From 37d4d4c0d525451e4cd449b83b227341d11bd2f0 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 8 Dec 2021 14:48:26 -0600
Subject: [PATCH 01/51] add test for reshape

---
 test/pytest/test_reshape.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100755 test/pytest/test_reshape.py

diff --git a/test/pytest/test_reshape.py b/test/pytest/test_reshape.py
new file mode 100755
index 0000000000..505f225547
--- /dev/null
+++ b/test/pytest/test_reshape.py
@@ -0,0 +1,36 @@
+""" Test that reshape is properly handled by optimizers.
+"""
+
+import pytest
+import hls4ml
+import tensorflow as tf
+import numpy as np
+from tensorflow.keras import optimizers
+from tensorflow.keras.layers import Input, Dense, Reshape, Softmax
+
+
+def test_reshape_parallel():
+    model = tf.keras.models.Sequential([
+        tf.keras.layers.Input((10)),
+        tf.keras.layers.Dense(10*3),
+        tf.keras.layers.Reshape((10,3)),
+        tf.keras.layers.ReLU()
+    ])
+    model.compile(optimizer='adam', loss='mse')
+    config = hls4ml.utils.config_from_keras_model(model)
+    output_dir = 'hls4mlprj_reshape_parallel'
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir)
+    hls_model.compile()
+
+def test_reshape_stream():
+    model = tf.keras.models.Sequential([
+        tf.keras.layers.Input((10)),
+        tf.keras.layers.Dense(10*3),
+        tf.keras.layers.Reshape((10,3)),
+        tf.keras.layers.ReLU()
+    ])
+    model.compile(optimizer='adam', loss='mse')
+    config = hls4ml.utils.config_from_keras_model(model)
+    output_dir = 'hls4mlprj_reshape_stream'
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type='io_stream')
+    hls_model.compile()

From 7c4b6c7fe2827abe7bc5d248c67b8e5a97243793 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 11 Apr 2022 14:18:50 -0500
Subject: [PATCH 02/51] snapshot of trying to fix inplace variables

---
 hls4ml/backends/fpga/fpga_types.py            | 29 -------------------
 .../quartus/passes/transform_types.py         |  9 ++----
 hls4ml/backends/template.py                   |  2 +-
 .../backends/vivado/passes/repack_stream.py   | 14 +++++++++
 .../vivado/passes/reshaping_templates.py      | 27 ++++++++++++++++-
 .../backends/vivado/passes/transform_types.py |  8 ++---
 hls4ml/backends/vivado/vivado_backend.py      |  1 +
 hls4ml/model/attributes.py                    |  6 ++--
 hls4ml/model/layers.py                        |  8 ++---
 hls4ml/model/types.py                         | 16 ----------
 10 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
index 68eeecfde6..3efbea1592 100644
--- a/hls4ml/backends/fpga/fpga_types.py
+++ b/hls4ml/backends/fpga/fpga_types.py
@@ -282,35 +282,6 @@ def __init__(self, type_converter):
 
 #endregion
 
-#region InplaceVariable
-
-class InplaceVariableConverter(object):
-    def __init__(self, type_converter, prefix):
-        self.type_converter = type_converter
-        self.prefix = prefix
-
-    def convert(self, tensor_var, io_type):
-        if tensor_var.__class__.__name__.startswith(self.prefix): # Already converted
-            return tensor_var
-
-        if io_type == 'io_stream':
-            tensor_var.type = self.type_converter.convert(PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.shape[-1], n_pack=1))
-        else:
-            tensor_var.type = self.type_converter.convert(tensor_var.type)
-
-        tensor_var.__class__ = type(self.prefix + 'InplaceVariable', (type(tensor_var),), {})
-        return tensor_var
-
-class VivadoInplaceVariableConverter(InplaceVariableConverter):
-    def __init__(self, type_converter):
-        super().__init__(type_converter=type_converter, prefix='Vivado')
-
-class QuartusInplaceVariableConverter(InplaceVariableConverter):
-    def __init__(self, type_converter):
-        super().__init__(type_converter=type_converter, prefix='Quartus')
-
-#endregion
-
 #region WeightsVariable
 
 class StaticWeightVariableDefinition(VariableDefinition):
diff --git a/hls4ml/backends/quartus/passes/transform_types.py b/hls4ml/backends/quartus/passes/transform_types.py
index 3591a63655..d1d7b01693 100644
--- a/hls4ml/backends/quartus/passes/transform_types.py
+++ b/hls4ml/backends/quartus/passes/transform_types.py
@@ -1,7 +1,8 @@
 
 from hls4ml.model.optimizer import GlobalOptimizerPass
-from hls4ml.model.types import InplaceVariable
-from hls4ml.backends.fpga.fpga_types import ACTypeConverter, QuartusArrayVariableConverter, HLSTypeConverter, QuartusInplaceVariableConverter, QuartusStructMemberVariableConverter, StaticWeightVariableConverter
+from hls4ml.backends.fpga.fpga_types import (
+    ACTypeConverter, QuartusArrayVariableConverter, HLSTypeConverter,
+    QuartusStructMemberVariableConverter, StaticWeightVariableConverter)
 
 
 class TransformTypes(GlobalOptimizerPass):
@@ -10,15 +11,11 @@ def __init__(self):
         self.array_var_converter = QuartusArrayVariableConverter(type_converter=self.type_converter)
         self.struct_var_converter = QuartusStructMemberVariableConverter(type_converter=self.type_converter)
         self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter)
-        self.inplace_var_converter = QuartusInplaceVariableConverter(type_converter=self.type_converter)
 
     def transform(self, model, node):
         io_type = node.model.config.get_config_value('IOType')
 
         for out_name, var in node.variables.items():
-            if isinstance(var, InplaceVariable):
-                new_var = self.inplace_var_converter.convert(var, io_type)
-
             if io_type == 'io_stream':
                 raise Exception('Streaming IO is not supported in Quartus.')
             elif io_type == 'io_parallel':
diff --git a/hls4ml/backends/template.py b/hls4ml/backends/template.py
index c48b94160a..569e84d7ae 100644
--- a/hls4ml/backends/template.py
+++ b/hls4ml/backends/template.py
@@ -45,7 +45,7 @@ def _default_config_params(self, layer):
         return params
 
 class FunctionCallTemplate(Template):
-    def __init__(self, layer_class, include_header=None):
+    def __init__(self, layer_class, include_header=[]):
         if isinstance(layer_class, (list, tuple, set)):
             name = '_'.join([cls.__name__.lower() for cls in layer_class])
         else:
diff --git a/hls4ml/backends/vivado/passes/repack_stream.py b/hls4ml/backends/vivado/passes/repack_stream.py
index 80339f4749..4aee53e188 100644
--- a/hls4ml/backends/vivado/passes/repack_stream.py
+++ b/hls4ml/backends/vivado/passes/repack_stream.py
@@ -80,6 +80,7 @@ def register_repack_stream(backend):
     # Register the optimization passes
     backend.register_pass('remove_final_reshape', RemoveFinalReshape)
     backend.register_pass('reshape_stream', ReshapeStream)
+    backend.register_pass('eliminate_flatten_stream', EliminateFlattenStream)
     backend.register_pass('broadcast_stream', BroadcastStream)
     
     # Register template passes
@@ -107,6 +108,19 @@ def transform(self, model, node):
 
         return True
 
+class EliminateFlattenStream(OptimizerPass):
+    ''' Remove Flatten layer in io_stream '''
+    def match(self, node):
+        # optimizer pass for a flatten layer (1 output dimension)
+        return isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1
+
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_stream':
+            return False
+
+        model.remove_node(node)
+        return True
+
 class BroadcastStream(OptimizerPass):
     def match(self, node):
         if isinstance(node, Merge):
diff --git a/hls4ml/backends/vivado/passes/reshaping_templates.py b/hls4ml/backends/vivado/passes/reshaping_templates.py
index 53c48a5f34..c11c349c3f 100644
--- a/hls4ml/backends/vivado/passes/reshaping_templates.py
+++ b/hls4ml/backends/vivado/passes/reshaping_templates.py
@@ -1,5 +1,5 @@
 
-from hls4ml.model.layers import ZeroPadding1D, ZeroPadding2D, Resize, Transpose
+from hls4ml.model.layers import ZeroPadding1D, ZeroPadding2D, Resize, Transpose, Reshape
 from hls4ml.backends.template import LayerConfigTemplate, FunctionCallTemplate
 
 # ZeroPadding templates
@@ -125,3 +125,28 @@ def format(self, node):
         params['dim'] = node.get_attr('dim')
 
         return self.template.format(**params)
+
+# Remaining reshapes only exist in io_parallel.
+# Given how the data is stored in a 1D array, changing
+# the shape does not add any code
+
+reshape_function_template = 'auto& {output} = {input};'
+
+class ReshapeConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(Reshape)
+        self.template = ''
+
+    def format(self, node):
+        return self.template
+
+class ReshapeFucntionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(Reshape)
+        self.template = reshape_function_template
+
+    def format(self, node):
+        params = {}
+        params['input'] = node.get_input_variable().name
+        params['output'] = node.get_output_variable().name
+        return self.template.format(**params)
diff --git a/hls4ml/backends/vivado/passes/transform_types.py b/hls4ml/backends/vivado/passes/transform_types.py
index b9892c3b11..761d221135 100644
--- a/hls4ml/backends/vivado/passes/transform_types.py
+++ b/hls4ml/backends/vivado/passes/transform_types.py
@@ -1,7 +1,8 @@
 
 from hls4ml.model.optimizer import GlobalOptimizerPass
-from hls4ml.model.types import InplaceVariable
-from hls4ml.backends.fpga.fpga_types import APTypeConverter, HLSTypeConverter, StaticWeightVariableConverter, VivadoArrayVariableConverter, VivadoInplaceVariableConverter, VivadoStreamVariableConverter
+from hls4ml.backends.fpga.fpga_types import (
+    APTypeConverter, HLSTypeConverter, StaticWeightVariableConverter,
+    VivadoArrayVariableConverter, VivadoStreamVariableConverter)
 
 
 class TransformTypes(GlobalOptimizerPass):
@@ -10,14 +11,11 @@ def __init__(self):
         self.array_var_converter = VivadoArrayVariableConverter(type_converter=self.type_converter)
         self.stream_var_converter = VivadoStreamVariableConverter(type_converter=self.type_converter)
         self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter)
-        self.inplace_var_converter = VivadoInplaceVariableConverter(type_converter=self.type_converter)
 
     def transform(self, model, node):
         io_type = node.model.config.get_config_value('IOType')
 
         for out_name, var in node.variables.items():
-            if isinstance(var, InplaceVariable):
-                new_var = self.inplace_var_converter.convert(var, io_type)
             if io_type == 'io_stream':
                 new_var = self.stream_var_converter.convert(var)
             elif io_type == 'io_serial':
diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
index a980d33ec9..8f0038a7ce 100644
--- a/hls4ml/backends/vivado/vivado_backend.py
+++ b/hls4ml/backends/vivado/vivado_backend.py
@@ -27,6 +27,7 @@ def _register_flows(self):
         streaming_passes = [
             'vivado:remove_final_reshape',
             'vivado:reshape_stream',
+            'vivado:eliminate_flatten_stream',
             'vivado:clone_output',
             'vivado:insert_zero_padding_before_conv1d',
             'vivado:insert_zero_padding_before_conv2d',
diff --git a/hls4ml/model/attributes.py b/hls4ml/model/attributes.py
index 40bdec3383..df4b40edda 100644
--- a/hls4ml/model/attributes.py
+++ b/hls4ml/model/attributes.py
@@ -1,6 +1,6 @@
 from collections.abc import MutableMapping
 
-from hls4ml.model.types import InplaceVariable, NamedType, TensorVariable, WeightVariable
+from hls4ml.model.types import NamedType, TensorVariable, WeightVariable
 
 class Attribute(object):
     def __init__(self, name, value_type=int, default=None, configurable=False):
@@ -57,7 +57,7 @@ def __iter__(self):
             yield key
 
     def __setitem__(self, key, value):
-        if isinstance(value, (TensorVariable, InplaceVariable)):
+        if isinstance(value, TensorVariable):
             self.layer.model.register_output_variable(key, value)
             self.attributes['result_t'] = value.type
             if key in self._expected_attributes and key in self.layer.outputs:
@@ -98,7 +98,7 @@ def __init__(self, attributes):
 
 class VariableMapping(AttributeMapping):
     def __init__(self, attributes):
-        super().__init__(attributes, (TensorVariable, InplaceVariable))
+        super().__init__(attributes, TensorVariable)
 
     def __getitem__(self, key):
         if 'out_' + key in self.attributes:
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 89be745ff1..4727b7f1d8 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -2,7 +2,7 @@
 import six
 
 from hls4ml.model.types import NamedType
-from hls4ml.model.types import TensorVariable, WeightVariable, CompressedWeightVariable, ExponentWeightVariable, InplaceVariable
+from hls4ml.model.types import TensorVariable, WeightVariable, CompressedWeightVariable, ExponentWeightVariable
 from hls4ml.model.types import IntegerPrecisionType, FixedPrecisionType, ExponentPrecisionType
 from hls4ml.model.types import find_minimum_width
 
@@ -271,11 +271,7 @@ def initialize(self):
             shape = shape[1:]
         dims = ['N_SIZE_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)]
 
-        out_name = self.outputs[0]
-        proxy = self.get_input_variable()
-        out = InplaceVariable(shape, dims, proxy)
-
-        self.set_attr(out_name, out)
+        self.add_output_variable(shape, dims)
 
 class Dense(Layer):
     _expected_attributes = [
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index afa579a65c..b26a216db9 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -207,22 +207,6 @@ def size_cpp(self):
         #TODO get rid of size_cpp() (and dim_names)
         return '*'.join([str(k) for k in self.dim_names])
 
-class InplaceVariable(Variable):
-    def __init__(self, shape, dim_names, proxy):
-        self.shape = shape
-        self.dim_names = dim_names
-        self.type = proxy.type
-        self.name = proxy.name
-        self.size = proxy.size
-
-    def get_shape(self):
-        return zip(self.dim_names, self.shape)
-
-    def size_cpp(self):
-        return '*'.join([str(k) for k in self.dim_names])
-
-    def definition_cpp(self, name_suffix='', as_reference=False):
-        return None
 
 class WeightVariable(Variable):
     def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs):

From 0b75fd06eb613c91790a9b62b538c04c923b525c Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 11 Apr 2022 18:33:01 -0500
Subject: [PATCH 03/51] fix parallel reshape

---
 hls4ml/backends/fpga/fpga_types.py            | 16 +++++++++++
 .../vivado/passes/inplace_parallel_reshape.py | 20 ++++++++++++++
 .../vivado/passes/reshaping_templates.py      | 27 +------------------
 .../backends/vivado/passes/transform_types.py |  7 ++++-
 hls4ml/backends/vivado/vivado_backend.py      |  1 +
 hls4ml/model/types.py                         |  6 +++++
 6 files changed, 50 insertions(+), 27 deletions(-)
 create mode 100644 hls4ml/backends/vivado/passes/inplace_parallel_reshape.py

diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
index 3efbea1592..0bd7a9479c 100644
--- a/hls4ml/backends/fpga/fpga_types.py
+++ b/hls4ml/backends/fpga/fpga_types.py
@@ -190,6 +190,14 @@ class QuartusArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
         return '{type} {name}{suffix}[{shape}] {pragma}'.format(type=self.type.name, name=self.cppname, suffix=name_suffix, shape=self.size_cpp(), pragma=self.pragma)
 
+class VivadoInplaceArrayVariableDefinition(VariableDefinition):
+    def definition_cpp(self):
+        return f'auto& {self.cppname} = {self.input_name}'
+
+class QuartusInplaceArrayVariableDefinition(VariableDefinition):
+    def definition_cpp(self):
+        return f'auto& {self.cppname} = {self.input_name}'
+
 class ArrayVariableConverter(object):
     def __init__(self, type_converter, prefix, definition_cls):
         self.type_converter = type_converter
@@ -214,6 +222,14 @@ class QuartusArrayVariableConverter(ArrayVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusArrayVariableDefinition)
 
+class VivadoInplaceArrayVariableConverter(ArrayVariableConverter):
+    def __init__(self, type_converter):
+        super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoInplaceArrayVariableDefinition)
+
+class QuartusInplaceArrayVariableConverter(ArrayVariableConverter):
+    def __init__(self, type_converter):
+        super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusInplaceArrayVariableDefinition)
+
 #endregion
 
 #region StructMemberVariable
diff --git a/hls4ml/backends/vivado/passes/inplace_parallel_reshape.py b/hls4ml/backends/vivado/passes/inplace_parallel_reshape.py
new file mode 100644
index 0000000000..abbb127090
--- /dev/null
+++ b/hls4ml/backends/vivado/passes/inplace_parallel_reshape.py
@@ -0,0 +1,20 @@
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import Reshape
+from hls4ml.model.types import InplaceTensorVariable
+
+class InplaceParallelReshape(OptimizerPass):
+    """
+    Because in io_parallel arrays are stored 1D, reshape produces no code
+    """
+    def match(self, node):
+        return isinstance(node, Reshape)
+
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_parallel':
+            return False
+
+        outvar = node.get_output_variable()
+        invar = node.get_input_variable(node.inputs[0])
+        newoutvar = InplaceTensorVariable(outvar, invar.cppname)
+        node.set_attr(node.outputs[0], newoutvar)
+        return False
diff --git a/hls4ml/backends/vivado/passes/reshaping_templates.py b/hls4ml/backends/vivado/passes/reshaping_templates.py
index c11c349c3f..53c48a5f34 100644
--- a/hls4ml/backends/vivado/passes/reshaping_templates.py
+++ b/hls4ml/backends/vivado/passes/reshaping_templates.py
@@ -1,5 +1,5 @@
 
-from hls4ml.model.layers import ZeroPadding1D, ZeroPadding2D, Resize, Transpose, Reshape
+from hls4ml.model.layers import ZeroPadding1D, ZeroPadding2D, Resize, Transpose
 from hls4ml.backends.template import LayerConfigTemplate, FunctionCallTemplate
 
 # ZeroPadding templates
@@ -125,28 +125,3 @@ def format(self, node):
         params['dim'] = node.get_attr('dim')
 
         return self.template.format(**params)
-
-# Remaining reshapes only exist in io_parallel.
-# Given how the data is stored in a 1D array, changing
-# the shape does not add any code
-
-reshape_function_template = 'auto& {output} = {input};'
-
-class ReshapeConfigTemplate(LayerConfigTemplate):
-    def __init__(self):
-        super().__init__(Reshape)
-        self.template = ''
-
-    def format(self, node):
-        return self.template
-
-class ReshapeFucntionTemplate(FunctionCallTemplate):
-    def __init__(self):
-        super().__init__(Reshape)
-        self.template = reshape_function_template
-
-    def format(self, node):
-        params = {}
-        params['input'] = node.get_input_variable().name
-        params['output'] = node.get_output_variable().name
-        return self.template.format(**params)
diff --git a/hls4ml/backends/vivado/passes/transform_types.py b/hls4ml/backends/vivado/passes/transform_types.py
index 761d221135..10b0b060fc 100644
--- a/hls4ml/backends/vivado/passes/transform_types.py
+++ b/hls4ml/backends/vivado/passes/transform_types.py
@@ -1,14 +1,17 @@
 
+from numpy import isin
 from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.backends.fpga.fpga_types import (
     APTypeConverter, HLSTypeConverter, StaticWeightVariableConverter,
-    VivadoArrayVariableConverter, VivadoStreamVariableConverter)
+    VivadoArrayVariableConverter, VivadoInplaceArrayVariableConverter, VivadoStreamVariableConverter)
+from hls4ml.model.types import InplaceTensorVariable
 
 
 class TransformTypes(GlobalOptimizerPass):
     def __init__(self):
         self.type_converter = HLSTypeConverter(precision_converter=APTypeConverter())
         self.array_var_converter = VivadoArrayVariableConverter(type_converter=self.type_converter)
+        self.inplace_array_var_converter = VivadoInplaceArrayVariableConverter(type_converter=self.type_converter)
         self.stream_var_converter = VivadoStreamVariableConverter(type_converter=self.type_converter)
         self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter)
 
@@ -23,6 +26,8 @@ def transform(self, model, node):
             elif io_type == 'io_parallel':
                 if node.name in node.model.inputs:
                     new_var = self.array_var_converter.convert(var, pragma='reshape')
+                elif isinstance(var, InplaceTensorVariable):
+                    new_var = self.inplace_array_var_converter.convert(var, pragma='')
                 else:
                     new_var = self.array_var_converter.convert(var, pragma='partition')
             else:
diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
index 8f0038a7ce..bc3f220cb0 100644
--- a/hls4ml/backends/vivado/vivado_backend.py
+++ b/hls4ml/backends/vivado/vivado_backend.py
@@ -44,6 +44,7 @@ def _register_flows(self):
 
         optimization_passes = [
             'vivado:optimize_pointwise_conv',
+            'vivado:inplace_parallel_reshape',
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
 
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index b26a216db9..90820c4e2e 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -207,6 +207,12 @@ def size_cpp(self):
         #TODO get rid of size_cpp() (and dim_names)
         return '*'.join([str(k) for k in self.dim_names])
 
+class InplaceTensorVariable(TensorVariable):
+    '''A TensorVariable that is just a link to another'''
+    def __init__(self, tv, input_name):
+        '''Always created with a passed in TesorVariable tv and the input_name it should link to'''
+        self.__dict__.update(tv.__dict__)
+        self.input_name = input_name
 
 class WeightVariable(Variable):
     def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs):

From 329cc4cc7f8f78260081459cb3957165897e0661 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 12 Apr 2022 11:39:21 -0500
Subject: [PATCH 04/51] Fix inplace usage for stream

---
 hls4ml/backends/fpga/fpga_types.py            | 23 ++++++++++--
 .../vivado/passes/inplace_parallel_reshape.py | 20 -----------
 .../backends/vivado/passes/inplace_reshape.py | 36 +++++++++++++++++++
 .../backends/vivado/passes/repack_stream.py   | 13 -------
 .../backends/vivado/passes/transform_types.py |  9 +++--
 hls4ml/backends/vivado/vivado_backend.py      |  2 +-
 hls4ml/model/types.py                         |  6 ++--
 7 files changed, 68 insertions(+), 41 deletions(-)
 delete mode 100644 hls4ml/backends/vivado/passes/inplace_parallel_reshape.py
 create mode 100644 hls4ml/backends/vivado/passes/inplace_reshape.py

diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
index 0bd7a9479c..e6bf796c98 100644
--- a/hls4ml/backends/fpga/fpga_types.py
+++ b/hls4ml/backends/fpga/fpga_types.py
@@ -192,11 +192,11 @@ def definition_cpp(self, name_suffix='', as_reference=False):
 
 class VivadoInplaceArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self):
-        return f'auto& {self.cppname} = {self.input_name}'
+        return f'auto& {self.cppname} = {self.input_var.cppname}'
 
 class QuartusInplaceArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self):
-        return f'auto& {self.cppname} = {self.input_name}'
+        return f'auto& {self.cppname} = {self.input_var.cppname}'
 
 class ArrayVariableConverter(object):
     def __init__(self, type_converter, prefix, definition_cls):
@@ -274,6 +274,10 @@ def definition_cpp(self, name_suffix='', as_reference=False):
         else: # Declaration
             return 'hls::stream<{type}> {name}{suffix}("{name}")'.format(type=self.type.name, name=self.cppname, suffix=name_suffix)
 
+class VivadoInplaceStreamVariableDefinition(VariableDefinition):
+    def definition_cpp(self):
+        return f'auto& {self.cppname} = {self.input_var.cppname}'
+
 class StreamVariableConverter(object):
     def __init__(self, type_converter, prefix, definition_cls):
         self.type_converter = type_converter
@@ -292,10 +296,25 @@ def convert(self, tensor_var, n_pack=1, depth=0):
         tensor_var.__class__ = type(self.prefix + 'StreamVariable', (type(tensor_var), self.definition_cls), {})
         return tensor_var
 
+class InplaceStreamVariableConverter(StreamVariableConverter):
+    def convert(self, tensor_var, n_pack=1, depth=0):
+        if isinstance(tensor_var, self.definition_cls): # Already converted
+            return tensor_var
+
+        tensor_var.pragma = None
+        tensor_var.type = self.type_converter.convert(PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.input_var.shape[-1], n_pack))
+
+        tensor_var.__class__ = type(self.prefix + 'StreamVariable', (type(tensor_var), self.definition_cls), {})
+        return tensor_var
+
 class VivadoStreamVariableConverter(StreamVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoStreamVariableDefinition)
 
+class VivadoInplaceStreamVariableConverter(InplaceStreamVariableConverter):
+    def __init__(self, type_converter):
+        super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoInplaceStreamVariableDefinition)
+
 #endregion
 
 #region WeightsVariable
diff --git a/hls4ml/backends/vivado/passes/inplace_parallel_reshape.py b/hls4ml/backends/vivado/passes/inplace_parallel_reshape.py
deleted file mode 100644
index abbb127090..0000000000
--- a/hls4ml/backends/vivado/passes/inplace_parallel_reshape.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import Reshape
-from hls4ml.model.types import InplaceTensorVariable
-
-class InplaceParallelReshape(OptimizerPass):
-    """
-    Because in io_parallel arrays are stored 1D, reshape produces no code
-    """
-    def match(self, node):
-        return isinstance(node, Reshape)
-
-    def transform(self, model, node):
-        if model.config.get_config_value('IOType') != 'io_parallel':
-            return False
-
-        outvar = node.get_output_variable()
-        invar = node.get_input_variable(node.inputs[0])
-        newoutvar = InplaceTensorVariable(outvar, invar.cppname)
-        node.set_attr(node.outputs[0], newoutvar)
-        return False
diff --git a/hls4ml/backends/vivado/passes/inplace_reshape.py b/hls4ml/backends/vivado/passes/inplace_reshape.py
new file mode 100644
index 0000000000..557974141c
--- /dev/null
+++ b/hls4ml/backends/vivado/passes/inplace_reshape.py
@@ -0,0 +1,36 @@
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import Reshape
+from hls4ml.model.types import InplaceTensorVariable
+
+class InplaceParallelReshape(OptimizerPass):
+    """
+    Because in io_parallel arrays are stored 1D, reshape produces no code
+    """
+    def match(self, node):
+        return isinstance(node, Reshape)
+
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_parallel':
+            return False
+
+        outvar = node.get_output_variable()
+        invar = node.get_input_variable(node.inputs[0])
+        newoutvar = InplaceTensorVariable(outvar, invar)
+        node.set_attr(node.outputs[0], newoutvar)
+        return False
+
+class InplaceStreamFlatten(OptimizerPass):
+    ''' Remove Flatten layer in io_stream '''
+    def match(self, node):
+        # optimizer pass for a flatten layer (1 output dimension)
+        return isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1
+
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_stream':
+            return False
+
+        outvar = node.get_output_variable()
+        invar = node.get_input_variable(node.inputs[0])
+        newoutvar = InplaceTensorVariable(outvar, invar)
+        node.set_attr(node.outputs[0], newoutvar)
+        return False
diff --git a/hls4ml/backends/vivado/passes/repack_stream.py b/hls4ml/backends/vivado/passes/repack_stream.py
index 4aee53e188..c5f8d569be 100644
--- a/hls4ml/backends/vivado/passes/repack_stream.py
+++ b/hls4ml/backends/vivado/passes/repack_stream.py
@@ -80,7 +80,6 @@ def register_repack_stream(backend):
     # Register the optimization passes
     backend.register_pass('remove_final_reshape', RemoveFinalReshape)
     backend.register_pass('reshape_stream', ReshapeStream)
-    backend.register_pass('eliminate_flatten_stream', EliminateFlattenStream)
     backend.register_pass('broadcast_stream', BroadcastStream)
     
     # Register template passes
@@ -108,18 +107,6 @@ def transform(self, model, node):
 
         return True
 
-class EliminateFlattenStream(OptimizerPass):
-    ''' Remove Flatten layer in io_stream '''
-    def match(self, node):
-        # optimizer pass for a flatten layer (1 output dimension)
-        return isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1
-
-    def transform(self, model, node):
-        if model.config.get_config_value('IOType') != 'io_stream':
-            return False
-
-        model.remove_node(node)
-        return True
 
 class BroadcastStream(OptimizerPass):
     def match(self, node):
diff --git a/hls4ml/backends/vivado/passes/transform_types.py b/hls4ml/backends/vivado/passes/transform_types.py
index 10b0b060fc..bea64690ce 100644
--- a/hls4ml/backends/vivado/passes/transform_types.py
+++ b/hls4ml/backends/vivado/passes/transform_types.py
@@ -3,7 +3,8 @@
 from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.backends.fpga.fpga_types import (
     APTypeConverter, HLSTypeConverter, StaticWeightVariableConverter,
-    VivadoArrayVariableConverter, VivadoInplaceArrayVariableConverter, VivadoStreamVariableConverter)
+    VivadoArrayVariableConverter, VivadoInplaceArrayVariableConverter,
+    VivadoStreamVariableConverter, VivadoInplaceStreamVariableConverter)
 from hls4ml.model.types import InplaceTensorVariable
 
 
@@ -13,6 +14,7 @@ def __init__(self):
         self.array_var_converter = VivadoArrayVariableConverter(type_converter=self.type_converter)
         self.inplace_array_var_converter = VivadoInplaceArrayVariableConverter(type_converter=self.type_converter)
         self.stream_var_converter = VivadoStreamVariableConverter(type_converter=self.type_converter)
+        self.inplace_stream_var_converter = VivadoInplaceStreamVariableConverter(type_converter=self.type_converter)
         self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter)
 
     def transform(self, model, node):
@@ -20,7 +22,10 @@ def transform(self, model, node):
 
         for out_name, var in node.variables.items():
             if io_type == 'io_stream':
-                new_var = self.stream_var_converter.convert(var)
+                if isinstance(var, InplaceTensorVariable):
+                    new_var = self.inplace_stream_var_converter.convert(var)
+                else:
+                    new_var = self.stream_var_converter.convert(var)
             elif io_type == 'io_serial':
                 new_var = self.array_var_converter.convert(var, pragma='stream')
             elif io_type == 'io_parallel':
diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
index bc3f220cb0..8113022da8 100644
--- a/hls4ml/backends/vivado/vivado_backend.py
+++ b/hls4ml/backends/vivado/vivado_backend.py
@@ -27,7 +27,6 @@ def _register_flows(self):
         streaming_passes = [
             'vivado:remove_final_reshape',
             'vivado:reshape_stream',
-            'vivado:eliminate_flatten_stream',
             'vivado:clone_output',
             'vivado:insert_zero_padding_before_conv1d',
             'vivado:insert_zero_padding_before_conv2d',
@@ -45,6 +44,7 @@ def _register_flows(self):
         optimization_passes = [
             'vivado:optimize_pointwise_conv',
             'vivado:inplace_parallel_reshape',
+            'vivado:inplace_stream_flatten',
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
 
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index 90820c4e2e..69c9423a7d 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -209,10 +209,10 @@ def size_cpp(self):
 
 class InplaceTensorVariable(TensorVariable):
     '''A TensorVariable that is just a link to another'''
-    def __init__(self, tv, input_name):
-        '''Always created with a passed in TesorVariable tv and the input_name it should link to'''
+    def __init__(self, tv, input_var):
+        '''Always created with a passed in TesorVariable tv and the input_var it should link to'''
         self.__dict__.update(tv.__dict__)
-        self.input_name = input_name
+        self.input_var = input_var
 
 class WeightVariable(Variable):
     def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs):

From 17846f2e7225940fcc731681d3a82500abf3cad7 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 12 Apr 2022 11:56:26 -0500
Subject: [PATCH 05/51] fix comment spelling, formatting

---
 hls4ml/backends/vivado/passes/repack_stream.py | 1 -
 hls4ml/model/types.py                          | 5 ++++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/hls4ml/backends/vivado/passes/repack_stream.py b/hls4ml/backends/vivado/passes/repack_stream.py
index c5f8d569be..80339f4749 100644
--- a/hls4ml/backends/vivado/passes/repack_stream.py
+++ b/hls4ml/backends/vivado/passes/repack_stream.py
@@ -107,7 +107,6 @@ def transform(self, model, node):
 
         return True
 
-
 class BroadcastStream(OptimizerPass):
     def match(self, node):
         if isinstance(node, Merge):
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index 69c9423a7d..284009f4f1 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -210,7 +210,10 @@ def size_cpp(self):
 class InplaceTensorVariable(TensorVariable):
     '''A TensorVariable that is just a link to another'''
     def __init__(self, tv, input_var):
-        '''Always created with a passed in TesorVariable tv and the input_var it should link to'''
+        '''
+        Always created with a passed in TensorVariable tv
+        and the input_var variable it should link to.
+        '''
         self.__dict__.update(tv.__dict__)
         self.input_var = input_var
 

From 966d257e4170b05fac8efb8d44615840a6dccd5e Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 19 Apr 2022 16:33:10 -0500
Subject: [PATCH 06/51] snapshot implementing qonnx in new master branch

---
 hls4ml/converters/onnx/convolution.py         | 110 +++---
 hls4ml/converters/onnx/core.py                | 116 +++---
 hls4ml/converters/onnx/merge.py               |  35 +-
 hls4ml/converters/onnx/pooling.py             |  85 ++---
 hls4ml/converters/onnx/quantizer.py           |  92 +++++
 hls4ml/converters/onnx/reshape.py             |  47 +--
 hls4ml/converters/onnx_to_hls.py              | 175 ++++-----
 hls4ml/model/attributes.py                    |   3 +-
 hls4ml/model/graph.py                         |   4 +-
 hls4ml/model/layers.py                        | 206 ++++++++--
 hls4ml/model/optimizer/__init__.py            |  29 +-
 .../model/optimizer/passes/batchnorm_opt.py   | 163 ++++++++
 .../model/optimizer/passes/conv_to_convxd.py  |  83 +++++
 .../optimizer/passes/matmul_const_to_dense.py |  67 ++++
 hls4ml/model/optimizer/passes/merge_const.py  | 157 ++++++++
 hls4ml/model/optimizer/passes/move_scales.py  | 291 +++++++++++++++
 hls4ml/model/optimizer/passes/nop.py          |  28 +-
 hls4ml/model/optimizer/passes/qkeras.py       |  76 ++--
 hls4ml/model/optimizer/passes/quant_opt.py    | 351 ++++++++++++++++++
 .../model/optimizer/passes/reshape_const.py   |  23 ++
 test/pytest/test_qonnx.py                     | 105 ++++++
 21 files changed, 1875 insertions(+), 371 deletions(-)
 create mode 100644 hls4ml/converters/onnx/quantizer.py
 create mode 100644 hls4ml/model/optimizer/passes/batchnorm_opt.py
 create mode 100644 hls4ml/model/optimizer/passes/conv_to_convxd.py
 create mode 100644 hls4ml/model/optimizer/passes/matmul_const_to_dense.py
 create mode 100644 hls4ml/model/optimizer/passes/merge_const.py
 create mode 100644 hls4ml/model/optimizer/passes/move_scales.py
 create mode 100644 hls4ml/model/optimizer/passes/quant_opt.py
 create mode 100644 hls4ml/model/optimizer/passes/reshape_const.py
 create mode 100755 test/pytest/test_qonnx.py

diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py
index 665da1cd49..9a8f98b7dd 100644
--- a/hls4ml/converters/onnx/convolution.py
+++ b/hls4ml/converters/onnx/convolution.py
@@ -1,5 +1,5 @@
-import math
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, get_onnx_input_name, compute_pads_1d, compute_pads_2d
+import numpy as np
+from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, compute_pads_1d, compute_pads_2d
 from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d
 
 @onnx_handler('Conv')
@@ -7,73 +7,65 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
 
     layer = {}
     layer['name'] = node.name
-    layer['data_format'] = 'channels_first' #ONNX's default is channel first
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    reader.add_input(layer['name'], node.input)
+    if node.domain != 'qonnx.custom_op.channels_last':
+        raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
+    layer['data_format'] = 'channels_last' # QONNX needs to be channels-last.
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+    #reader.add_input(layer['name'], node.input)
 
     strides = get_onnx_attribute(node, 'strides')
     kernel_shape = get_onnx_attribute(node, 'kernel_shape')
+    # Note:  currently don't have support for auto_pad.
+    pads = get_onnx_attribute(node, 'pads')
+    dilations = get_onnx_attribute(node, 'dilations')
+    if dilations is None:
+        dilations = [1]*len(layer['kernel_shape'])
 
-    if len(input_shapes[0]) == 3: # Conv1D
-        layer['class_name'] = 'Conv1D'
+    if get_onnx_attribute(node, 'group') != 1:
+        raise ValueError("Only 1 group supported corrently")
 
-        layer['in_width']= input_shapes[0][2]
-        layer['n_chan']=input_shapes[0][1]
-        layer['filt_width']= kernel_shape[0]
-        layer['n_filt']= reader.get_weights_data(layer['name'], 'kernel').shape[2]
-        layer['stride_width'] = strides[0]
-        pads = compute_pads_1d(node, layer)
+    layer['n_chan'] = input_shapes[0][-1]
+    layer['n_filt'] = input_shapes[1][0]
+
+    layer['n_dim'] = len(input_shapes[0]) - 2  # 2 comes from channels and batch dimentions
+    if layer['n_dim'] not in (1, 2):
+        raise ValueError("Only 1D and 2D convolutions are supported")
+    layer['class_name'] = 'Conv'
 
+    #set some values needed later
+    if layer['n_dim'] == 1:
+        # this is 1D convolution
+        full_width = input_shapes[0][-2] + pads[0] + pads[1]
+        eff_kernel_width = kernel_shape[0] * dilations[0]
+        layer['n_out'] = int(np.ceil((full_width - eff_kernel_width + 1) / strides[0]))
+        # for compatibility interpret some variables
         layer['pad_left'] = pads[0]
         layer['pad_right'] = pads[1]
-        
-        if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding
-            layer['padding'] = 'valid'
-        else:
-            layer['padding'] = 'same'
-        
-        (layer['out_width'],_,_) = compute_padding_1d(layer['padding'],
-                                                      layer['in_width'],
-                                                      layer['stride_width'],
-                                                      layer['filt_width'])
-
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_width']]
-        
-    elif len(input_shapes[0]) == 4: # Conv2D
-        
-        layer['class_name'] = 'Conv2D'
-
-        layer['in_height']=input_shapes[0][2]
-        layer['in_width']=input_shapes[0][3]
-        layer['n_chan']=input_shapes[0][1]
-
-        layer['filt_height']=kernel_shape[0]
-        layer['filt_width']=kernel_shape[1]
-        
-        layer['n_filt']=next((x.type.tensor_type.shape.dim[1].dim_value for x in graph.value_info if x.name == node.output[0]), None)
-        layer['stride_height'] = strides[0]
-        layer['stride_width'] = strides[1]
-        pads = compute_pads_2d(node, layer)
+        layer['filt_width'] = kernel_shape[0]
+        layer['stride_width'] = strides[0]
+        layer['dilation_width'] = dilations[0]
+    else:
+        # 2d
+        full_height = input_shapes[0][-3] + pads[0] + pads[2]
+        eff_kernel_height = kernel_shape[0] * dilations[0]
+        out_height = int(np.ceil((full_height - eff_kernel_height + 1) / strides[0]))
+        layer['out_height'] = out_height
 
+        full_width = input_shapes[0][-2] + pads[1] + pads[3]
+        eff_kernel_width = kernel_shape[1] * dilations[1]
+        out_width = int(np.ceil((full_width - eff_kernel_width + 1) / strides[1]))
+        layer['out_width'] = out_width
+        # for compatibility interpret some variables
         layer['pad_top'] = pads[0]
-        layer['pad_bottom'] = pads[2]
         layer['pad_left'] = pads[1]
+        layer['pad_bottom'] = pads[2]
         layer['pad_right'] = pads[3]
+        layer['filt_height'] = kernel_shape[0] 
+        layer['filt_width'] = kernel_shape[1] 
+        layer['stride_height'] = strides[0] 
+        layer['stride_width'] = strides[1] 
+        layer['dilation_height'] = dilations[0] 
+        layer['dilation_width'] = dilations[1] 
 
-        if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding in Keras/Tensorflow
-            layer['padding'] = 'valid'
-        else: #Only 'valid' and 'same' padding are available in Keras
-            layer['padding'] = 'same'
-            
-        (layer['out_height'], layer['out_width'],_,_,_,_) = compute_padding_2d(layer['padding'],
-                                                                               layer['in_height'],
-                                                                               layer['in_width'],
-                                                                               layer['stride_height'],
-                                                                               layer['stride_width'],
-                                                                               layer['filt_height'],
-                                                                               layer['filt_width'])
-
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']]
-
-    return layer, output_shape
-    
\ No newline at end of file
+    return layer
diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py
index 985d941549..2266e7f964 100644
--- a/hls4ml/converters/onnx/core.py
+++ b/hls4ml/converters/onnx/core.py
@@ -1,30 +1,40 @@
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, get_onnx_input_name
-from hls4ml.converters.utils import compute_padding_1d
+import numpy as np
+from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute
 
-@onnx_handler(*['Gemm', 'MatMul'])
+@onnx_handler(*['Gemm'])
 def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config):
-    
+
     layer = {}
-   
+
     layer['class_name'] = 'Dense'
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
 
     tran_weight = get_onnx_attribute(node, 'transB', 0)
     reader.add_input(layer['name'], node.input, tran_weight)
 
-    weights_shape = reader.get_weights_data(layer['name'], 'kernel').shape
+    weights_shape = input_shapes[1][:]
     layer['n_in'] = weights_shape[0]
-    layer['n_out'] = weights_shape[1]      
+    layer['n_out'] = weights_shape[1]
+
+    return layer
+
+@onnx_handler('MatMul')
+def parse_matmul_layer(reader, node, inputs_map, input_shapes, graph, config):
+
+    layer = {}
+
+    layer['class_name'] = 'MatMul'
+    layer['name'] = node.name
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
 
-    output_shape = input_shapes[0][:]
-    output_shape[-1] = layer['n_out']
-    
-    return layer, output_shape
+    return layer
 
 #------------------Global paras for activations
 # TODO: repair HardSigmoid support
-# https://github.com/fastmachinelearning/hls4ml/issues/409 
+# https://github.com/fastmachinelearning/hls4ml/issues/409
 #activation_layers = ['Relu', 'Tanh', 'Sigmoid', 'LeakyRelu', 'ThresholdedRelu', 'HardSigmoid', 'Elu', 'Selu', 'PRelu', 'Softmax', 'Softsign', 'Softplus', 'Clip']
 activation_layers = ['Relu', 'Tanh', 'Sigmoid', 'LeakyRelu', 'ThresholdedRelu', 'Elu', 'Selu', 'PRelu', 'Softmax', 'Softsign', 'Softplus', 'Clip']
 
@@ -37,25 +47,27 @@ def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config):
 
 @onnx_handler(*activation_layers)
 def parse_activation_layer(reader, node, inputs_map, input_shapes, graph, config):
-    
+
     layer = {}
-    
+
     layer['name'] = node.name
     layer['class_name'] = activation_map[node.op_type]
     layer['activation'] = node.op_type.lower()
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+
     if layer['class_name'] != 'Activation':
-        
+
         if layer['class_name'] == 'Softmax':
             layer['activation'] = 'softmax'
+            layer['axis'] = get_onnx_attribute(node, 'axis', -1)
 
         elif layer['class_name'] in ['ELU', 'LeakyReLU', 'ThresholdedReLU']:
             layer['activation'] = layer['class_name']
             layer['activ_param'] = get_onnx_attribute(node, 'alpha', 0.01)
-        
+
         elif layer['class_name'] == 'Clip':
-            
+
             clip_min_node = [x for x in graph.initializer if x.name in node.input]
             clip_min =  clip_min_node[0].float_data[0]
 
@@ -65,38 +77,54 @@ def parse_activation_layer(reader, node, inputs_map, input_shapes, graph, config
                 layer['activation'] = 'ReLU'
             else:
                 raise Exception('Clip with min != 0 is not supported yet!')
-        
+
         else:
             layer['activation'] = layer['class_name']
             layer['class_name'] = 'Activation'
-       
-    return layer, [shape for shape in input_shapes[0]]
-    
+
+    return layer
+
 @onnx_handler('BatchNormalization')
 def parse_batchnorm_layer(reader, node, inputs_map, input_shapes, graph, config):
-    
+
     layer = {}
-   
-    layer['class_name'] = 'BatchNormalization'
-    layer['data_format'] = 'channels_first'
+
+    layer['class_name'] = 'BatchNormOnnx'
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+
     #Other attributes
-    layer['epsilon'] = get_onnx_attribute(node, 'epsilon')
-    layer['momentum'] = get_onnx_attribute(node, 'momentum')
-            
-    reader.add_input(layer['name'], node.input)
-    
-    in_size = 1
-    for dim in input_shapes[0][1:]:
-        in_size *= dim
-        
-    layer['n_in'] = layer['n_out'] = in_size
-    
+    layer['epsilon'] = get_onnx_attribute(node, 'epsilon', 1e-05)
+    # layer['momentum'] = get_onnx_attribute(node, 'momentum', 0.9)  # not used
+
+    layer['n_in'] = layer['n_out'] = np.prod(input_shapes[0][1:])
+
     if len(input_shapes[0]) == 2:
         layer['n_filt'] = -1
     elif len(input_shapes[0]) > 2:
-        layer['n_filt']= input_shapes[0][1] #Always channel first for onnx
-    
-    return layer, [shape for shape in input_shapes[0]]
\ No newline at end of file
+        if node.domain != 'qonnx.custom_op.channels_last':
+            raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
+        layer['data_format'] = 'channels_last' # QONNX needs to be channels-last.
+        layer['n_filt']= input_shapes[0][-1]
+    else:
+        raise RuntimeError(f"Unexpected input shape: {input_shapes[0]}")
+
+    return layer
+
+@onnx_handler('Quant')
+def parse_quant_layer(reader, node, inputs_map, input_shapes, graph, config):
+
+    layer = {}
+
+    layer['class_name'] = 'Quant'
+    layer['name'] = node.name
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+
+    #Other attributes
+    layer['narrow'] = bool(get_onnx_attribute(node, 'narrow'))
+    layer['rounding_mode'] = get_onnx_attribute(node, 'rounding_mode')
+    layer['signed'] = bool(get_onnx_attribute(node, 'signed'))
+
+    return layer
diff --git a/hls4ml/converters/onnx/merge.py b/hls4ml/converters/onnx/merge.py
index 1ba957dc07..4c83b1b06d 100644
--- a/hls4ml/converters/onnx/merge.py
+++ b/hls4ml/converters/onnx/merge.py
@@ -1,15 +1,15 @@
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, get_onnx_input_name
+from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute
 
-merge_layers = ['Add', 'Sub', 'Mul', 'Average', 'Max', 'Min', 'Concat', 'Sum']
+merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum']
 @onnx_handler(*merge_layers)
 def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
-    
+
     layer = {}
     layer['class_name'] = node.op_type
     layer['name'] = node.name
     layer['op'] = layer['class_name'].lower()
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    output_shape = input_shapes[0]
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
 
     if layer['class_name'] == 'Concat':
         rank = len(input_shapes[0][1:])
@@ -20,20 +20,29 @@ def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
         layer['op'] = layer['class_name'].lower() + '{}d'.format(rank)
         layer['axis'] = get_onnx_attribute(node, 'axis')
 
-        #Calculate output shape
-        new_dim = sum([x.type.tensor_type.shape.dim[layer['axis']].dim_value for x in graph.value_info if x.name in node.input])
-        output_shape[layer['axis']] = new_dim
-   
+        # #Calculate output shape
+        # new_dim = sum([x.type.tensor_type.shape.dim[layer['axis']].dim_value for x in graph.value_info if x.name in node.input])
+        # output_shape[layer['axis']] = new_dim
+
     elif layer['class_name'] ==  'Add':
         #Check if the layer is an AddBias
         for input in node.input:
+            # I think we don't use BiasAdd in ONNX currently
             if "bias" in input:
                 layer['class_name'] = 'BiasAdd'
-                reader.add_input(layer['name'], node.input)
+                # # Should the line below really be replaced with the one below it?
+                # # Going to assume so
+                # reader.add_input(layer['name'], node.input)
+                reader.add_input(layer['name'], input)
+
+        if layer['class_name'] ==  'Add':
+            # If it wasn't changed, just make it a merge node
+            layer['class_name'] = 'Merge'
+
     else:
         layer['class_name'] = 'Merge'
-    
+
     if len(layer['inputs']) > 2:
         raise Exception('ERROR: Merging more than two tensors is not yet supported.')
-    
-    return layer, output_shape
\ No newline at end of file
+
+    return layer
\ No newline at end of file
diff --git a/hls4ml/converters/onnx/pooling.py b/hls4ml/converters/onnx/pooling.py
index fe74e32bf9..594614fcc0 100644
--- a/hls4ml/converters/onnx/pooling.py
+++ b/hls4ml/converters/onnx/pooling.py
@@ -1,21 +1,30 @@
-import math
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, compute_pads_1d, compute_pads_2d, get_onnx_input_name
+import numpy as np
+from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, compute_pads_1d, compute_pads_2d
 from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d
 
 pool_operations = ['AveragePool', 'MaxPool']
 @onnx_handler(*pool_operations)
 def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
-    
+
     layer = {}
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+    if node.domain != 'qonnx.custom_op.channels_last':
+        raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
     layer['class_name'] = node.op_type
-    layer['data_format'] = 'channels_first' #Default ONNX
+    layer['data_format'] = 'channels_last' #Default QONNX
 
     info = layer['class_name'].replace('Pool', '')
     strides = get_onnx_attribute(node, 'strides')
     kernel_shape = get_onnx_attribute(node, 'kernel_shape')
-    
+    pads = get_onnx_attribute(node, 'pads')
+    layer['pads'] = pads
+    dilations = get_onnx_attribute(node, 'dilations')
+    if dilations is None:
+        dilations = [1]*len(kernel_shape)
+    layer['dilations'] = dilations
+
     if len(input_shapes[0]) == 3: # 1D
         layer['class_name'] = info + 'Pooling1D'
 
@@ -24,58 +33,35 @@ def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
 
         layer['pool_width'] = kernel_shape[0]
         layer['stride_width'] = strides[0]
-        
-        #Padding
-        pads = compute_pads_1d(node, layer)
-        layer['pad_left'] = pads[0]
-        layer['pad_right'] = pads[1]
-
-        if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding
-            layer['padding'] = 'valid'
-        else:
-            layer['padding'] = 'same'
-            
-        (layer['n_out'],_,_) = compute_padding_1d(layer['padding'],
-                                                      layer['n_in'],
-                                                      layer['stride_width'],
-                                                      layer['pool_width'])
-
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['n_out']]
-    
+
+        # formula from ONNX Operators.md documentation
+        layer['n_out'] = int(np.floor((layer['n_in'] + np.sum(pads) - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1))
+
+
     elif len(input_shapes[0]) == 4: # 2D
         layer['class_name'] = info + 'Pooling2D'
 
-        layer['n_filt'] = input_shapes[0][1]
-        layer['in_height'] = input_shapes[0][2]
-        layer['in_width'] = input_shapes[0][3]
+        layer['n_filt'] = input_shapes[0][3]
+        layer['in_height'] = input_shapes[0][1]
+        layer['in_width'] = input_shapes[0][2]
 
         layer['stride_height'] = strides[0]
         layer['stride_width'] = strides[1]
         layer['pool_height'] = layer['filt_height'] = kernel_shape[0]
         layer['pool_width'] = layer['filt_width'] = kernel_shape[1]
-        
-        pads = compute_pads_2d(node, layer)
+
         layer['pad_top'] = pads[0]
         layer['pad_bottom'] = pads[2]
         layer['pad_left'] = pads[1]
         layer['pad_right'] = pads[3]
 
-        if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding in Keras/Tensorflow
-            layer['padding'] = 'valid'
-        else: #Only 'valid' and 'same' padding are available in Keras
-            layer['padding'] = 'same'
-            
-        (layer['out_height'], layer['out_width'],_,_,_,_) = compute_padding_2d(layer['padding'],
-                                                                               layer['in_height'],
-                                                                               layer['in_width'],
-                                                                               layer['stride_height'],
-                                                                               layer['stride_width'],
-                                                                               layer['filt_height'],
-                                                                               layer['filt_width'])
-        
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']]
-    
-    return layer, output_shape
+        # formula from ONNX Operators.md documentation
+        layer['out_height'] = int(np.floor((layer['in_height'] + pads[0] + pads[2] - ((kernel_shape[0] - 1) * dilations[0] + 1))
+                                  / strides[0] + 1))
+        layer['out_width'] = int(np.floor((layer['in_width'] + pads[1] + pads[3] - ((kernel_shape[1] - 1) * dilations[1] + 1))
+                                  / strides[1] + 1))
+
+    return layer
 
 global_pooling_layers = ['GlobalMaxPool', 'GlobalAveragePool']
 @onnx_handler(*global_pooling_layers)
@@ -83,7 +69,8 @@ def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, co
 
     layer = {}
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
     layer['class_name'] = node.op_type
     layer['data_format'] = 'channels_first'
 
@@ -99,14 +86,12 @@ def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, co
 
         layer['n_in'] = input_shapes[0][2]
         layer['n_filt'] = input_shapes[0][1]
-    
+
     elif len(input_shapes[0]) == 4:
         layer['class_name'] = info + 'Pooling2D'
 
         layer['n_filt'] = input_shapes[0][1]
         layer['in_height'] = input_shapes[0][2]
         layer['in_width'] = input_shapes[0][3]
-    
-    output_shape = [input_shapes[0][0], layer['n_filt']] + [1]*(len(input_shapes[0]) - 2)
 
-    return layer, output_shape
\ No newline at end of file
+    return layer
\ No newline at end of file
diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py
new file mode 100644
index 0000000000..a078aae07f
--- /dev/null
+++ b/hls4ml/converters/onnx/quantizer.py
@@ -0,0 +1,92 @@
+"""
+Quantizer for the Quant node, after scale and zeropoint hafe been extracted.
+(Thus at this point they are 1 and 0.)
+
+This is based on the sample implementation in finn-base
+"""
+
+import numpy as np
+from hls4ml.model.types import Quantizer
+
+class QuantNodeQuantizer(Quantizer):
+    """ This implements a quantizer for a FixedPrecisionType with width==integer"""
+    def __init__(self, precision):
+        assert(precision.width == precision.integer)
+        super().__init__(precision.width, precision)
+
+    def __call__(self, data):
+        """ Apply the quantization on the data """
+        # Clamping
+        min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits)
+        max_int_val = self._max_int(self.hls_type.signed, self.bits)
+        data = np.where(data > max_int_val, max_int_val, data)
+        data = np.where(data < min_int_val, min_int_val, data)
+        # Rounding
+        rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode)
+        return rounding_fx(data)
+
+
+    @staticmethod
+    def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
+        """Compute the minimum integer representable by a given number of bits.
+        Args:
+            signed (bool): Indicates whether the represented integer is signed or not.
+            saturation_mode (bool): Indicates the saturation mode used (AP_SAT_SYM or AP_SAT)
+            bit_width (int): Number of bits available for the representation.
+        Returns:
+            int: Maximum unsigned integer that can be represented according to
+            the input arguments.
+        Examples:
+            >>> min_int(signed=True, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(-127)
+            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(0)
+            >>> min_int(signed=True, saturation_mode='AP_SAT', bit_width=8)
+            int(-128)
+            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(0)
+        """
+        if saturation_mode not in ("AP_SAT_SYM", "AP_SAT"):
+            raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported")
+        if signed and saturation_mode == "AP_SAT_SYM":
+            value = -(2 ** (bit_width - 1)) + 1
+        elif signed:
+            value = -(2 ** (bit_width - 1))
+        else:
+            value = 0
+        return value
+
+    @staticmethod
+    def _max_int(signed: bool, bit_width: int) -> int:
+        """Compute the maximum integer representable by a given number of bits.
+        (Note, narrow and unsigned is not supported by the implementation, so saturation mode is not used)
+        Args:
+            signed (bool): Indicates whether the represented integer is signed or not.
+            bit_width (int): Number of bits available for the representation.
+        Returns:
+            Tensor: Maximum integer that can be represented according to
+            the input arguments.
+        Examples:
+            >>> max_int(signed=True, bit_width=8)
+            int(127)
+            >>> max_int(signed=False, bit_width=8)
+            int(255)
+        """
+        if not signed:
+            value = (2 ** bit_width) - 1
+        else:
+            value = (2 ** (bit_width - 1)) - 1
+        return value
+
+    @staticmethod
+    def _resolve_rounding_mode(mode_string):
+        """Resolve the rounding mode string of Quant and Trunc ops
+        to the corresponding numpy functions."""
+        if mode_string == "AP_RND_CONV":
+            return np.round
+        # elif mode_string == "CEIL":   # not supported
+        #     return np.ceil
+        elif mode_string == "AP_TRN":
+            return np.floor
+        else:
+            raise ValueError(f"Could not resolve rounding mode called: {mode_string}")
diff --git a/hls4ml/converters/onnx/reshape.py b/hls4ml/converters/onnx/reshape.py
index aeee90c3bd..8f5127538f 100644
--- a/hls4ml/converters/onnx/reshape.py
+++ b/hls4ml/converters/onnx/reshape.py
@@ -1,20 +1,19 @@
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_input_name, get_onnx_attribute
+from hls4ml.converters.onnx_to_hls import onnx_handler
 import numpy as np
 
 @onnx_handler('Transpose')
 def parse_transpose_layer(reader, node, inputs_map, input_shapes, graph, config):
-    
+
     layer = {}
     layer['name'] = node.name
     layer['class_name'] = 'Transpose'
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    
-    perm = [list(i.ints) for i in node.attribute][0] #This will get something like [[a,b,c]][0] = [a,b,c]    
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+
+    perm = [list(i.ints) for i in node.attribute][0] #This will get something like [[a,b,c]][0] = [a,b,c]
     layer['perm'] = [x - 1 for x in perm[1:]] #Ignore the batch dimension in ONNX, and adjust the perm indexing
-    
-    output_shape = [input_shapes[0][i] for i in perm]
-    
-    return layer, output_shape
+
+    return layer
 
 @onnx_handler('Reshape')
 def parse_reshape_layer(reader, node, inputs_map, input_shapes, graph, config):
@@ -22,17 +21,19 @@ def parse_reshape_layer(reader, node, inputs_map, input_shapes, graph, config):
     layer = {}
     layer['name'] = node.name
     layer['class_name'] = 'Reshape'
-    layer['inputs'] = get_onnx_input_name(node, graph)
-
-    target_shape = list([x for x in graph.initializer if x.name == node.input[1]][0].int64_data)[1:]
-
-    if -1 in target_shape: #Need to infer shape for -1
-        print("WARNING: Inferring -1 shape ... ")
-        dummy_x = np.ones(input_shapes[0][1:])
-        dummy_y = np.reshape(dummy_x, target_shape)
-        target_shape = list(dummy_y.shape)
-    
-    layer['target_shape'] = target_shape
-    output_shape = input_shapes[0][:1] + layer['target_shape']
-    
-    return layer, output_shape
\ No newline at end of file
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+
+    return layer
+
+@onnx_handler('Flatten')
+def parse_flatten_layer(reader, node, inputs_map, input_shapes, graph, config):
+
+    layer = {}
+    layer['name'] = node.name
+    layer['class_name'] = 'Reshape'
+    layer['inputs'] = node.input
+    layer['outputs'] = node.output
+    layer['target_shape'] = [-1]
+
+    return layer
\ No newline at end of file
diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py
index 083b415502..91da4d9d4d 100644
--- a/hls4ml/converters/onnx_to_hls.py
+++ b/hls4ml/converters/onnx_to_hls.py
@@ -1,8 +1,5 @@
-from __future__ import print_function
-from sys import path_importer_cache
 import numpy as np
-import math
-from onnx import ModelProto, GraphProto, NodeProto, TensorProto
+import onnx
 from onnx import  helper, numpy_helper, shape_inference
 
 from hls4ml.model import ModelGraph
@@ -29,7 +26,7 @@ def __init__(self, model):
 
     def get_weights_data(self, layer_name, var_name):
         """Extract weights data from ONNX model.
-        
+
         Parameters
         ----------
         layer_name : string
@@ -40,15 +37,15 @@ def get_weights_data(self, layer_name, var_name):
         Returns
         -------
         data : numpy array
-            extracted weights data 
-        
+            extracted weights data
+
         """
         #Get the node associated with the layer name
         node = next((node for node in self.model.graph.node if node.name == layer_name))
-        
+
         inputs = self.input_map[layer_name]
         inp_idx = self.index_map[var_name]
-        
+
         if inp_idx >= len(inputs['inputs']):
             # Check if the layer is an AddBias layer
             if (node.op_type == 'Add') and (var_name == 'bias'):
@@ -58,7 +55,7 @@ def get_weights_data(self, layer_name, var_name):
                 return None
 
         tensor = next((x for x in self.model.graph.initializer if x.name == inputs['inputs'][inp_idx]), None)
-        
+
         if tensor is not None:
 
             data = numpy_helper.to_array(tensor)
@@ -68,23 +65,23 @@ def get_weights_data(self, layer_name, var_name):
                     data = data.transpose(inputs['perm'])
                 else:
                     data = data.transpose()
-            
+
             #Check for transB in Gemm
             if node.op_type == 'Gemm':
                 if not get_onnx_attribute(node, 'transB'):
                     data = data.transpose()
 
         return data
-    
+
     def add_input(self, layer_name, inputs, transpose=True, perm=None):
         self.input_map[layer_name] = { 'inputs': inputs, 'transpose': transpose, 'perm': perm }
-    
+
 ####----------------------Helpers---------------------######
 def sanitize_layer_name(layer):
     new_name = layer['name']
     if new_name[0].isdigit():
         new_name = layer['class_name'].lower() + new_name
-    
+
     layer['name'] = new_name
 
 def replace_char_inconsitency(name):
@@ -103,9 +100,24 @@ def get_onnx_attribute(operation, name, default=None):
             value = value.decode()
     return value
 
-def get_input_shape(model, operation, input_idx=0):
-    value_info_idx = next((i for i, x in enumerate(model.graph.value_info) if x.name == operation.input[input_idx]), 0)
-    return [d.dim_value for d in model.graph.value_info[value_info_idx].type.tensor_type.shape.dim]
+def get_input_shape(graph, operation, input_idx=None):
+    """ Return the input shapes of the model. If input_dx is not specified, then the full array is returned
+    """
+    if input_idx is None:
+        rv = []
+        for inp in operation.input:
+            value_info_idx = next((i for i, x in enumerate(graph.value_info) if x.name == inp), 0)
+            dim = [d.dim_value for d in graph.value_info[value_info_idx].type.tensor_type.shape.dim]
+            if dim:
+                rv.append(dim)
+        return rv
+    else:
+        value_info_idx = next((i for i, x in enumerate(graph.value_info) if x.name == operation.input[input_idx]), 0)
+        return [d.dim_value for d in graph.value_info[value_info_idx].type.tensor_type.shape.dim]
+
+def get_constant_value(graph, constant_name):
+    tensor = next((x for x in graph.initializer if x.name == constant_name), None)
+    return numpy_helper.to_array(tensor)
 
 def compute_pads_1d(operation, layer):
     auto_pad = get_onnx_attribute(operation, 'auto_pad', 'NOTSET')
@@ -125,7 +137,7 @@ def compute_pads_1d(operation, layer):
             pads = [0, 0]
     else:
         pads = get_onnx_attribute(operation, 'pads', [0, 0])
-    
+
     return pads
 
 def compute_pads_2d(operation, layer):
@@ -153,7 +165,7 @@ def compute_pads_2d(operation, layer):
             pads = [0, 0, 0, 0]
     else:
         pads = get_onnx_attribute(operation, 'pads', [0, 0, 0, 0])
-    
+
     return pads
 
 ####----------------------Layer handling---------------------######
@@ -174,26 +186,6 @@ def decorator(function):
         return function
     return decorator
 
-#--->> A set of functions to address the naming convetion in ONNx's graph
-def get_onnx_input_name(node, graph):
-    """
-    In ONNX, when calling node.input, it returns the node input's index in the graph instead of the input's name.
-    However, the input's name is used for indexing in ModelGraph's graph. This function return the input node's name instead.
-    """
-    
-    in_node = [in_node for in_node in graph.node if (in_node.output[0] in node.input)]
-
-    if in_node:
-        if in_node[0].op_type != 'Flatten':
-            input_node_name = [x.name for x in in_node]
-        else: #IF it's a flatten
-            input_node_name = [x.name for x in graph.node if (x.output[0] in in_node[0].input)]
-
-        return input_node_name
-        
-    else: #If there is no input name it's actually the first layer
-        return [replace_char_inconsitency(node.input[0])]
-
 def get_out_layer_name(graph):
     """
     Get the output layer's name for the model.
@@ -205,16 +197,16 @@ def get_out_layer_name(graph):
 
 def onnx_to_hls(config):
     """ Convert onnx model to hls model from configuration.
-    
+
     Parameters
     ----------
     config: dict
         onnx configuration from yaml file or passed through API.
-        
+
     Returns
     -------
-    ModelGraph : hls4ml model object
-        
+    hls_model : hls4ml model object
+
     """
 
     #This is a list of dictionaries to hold all the layer info we need to generate HLS
@@ -222,89 +214,82 @@ def onnx_to_hls(config):
 
     #Extract model architecture
     print('Interpreting Model ...')
-    
+
     model =  onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel']
-          
-    model = shape_inference.infer_shapes(model)
-    graph = model.graph
-    
+
+    # # We don't infer the shapes because the QONNX preprocessing does it. We may want to add it back,
+    # # however, if we want to support non-preprocessed ONNX
+    # model = shape_inference.infer_shapes(model)
+
     reader = ONNXDataReader(model)
-    
+
     #Obtain list of input/ouput layers
     all_inputs = [x.name for x in model.graph.input]
     all_initializers = [x.name for x in model.graph.initializer]
     input_layers = [x for x in all_inputs if x not in all_initializers]
-    output_layers = get_out_layer_name(graph)
-    
+    constant_layers = all_initializers  # no need to copy it even though we change it
+    output_layers = get_out_layer_name(model.graph)
+
     print("Output layers: ", output_layers)
-    
+
     for i, inp in enumerate(input_layers):
         input_layer = {}
         input_layer['name'] = replace_char_inconsitency(inp)
         input_layer['class_name'] = 'InputLayer'
         inp_shape = next((x.type.tensor_type.shape.dim for x in model.graph.input if x.name == inp), None)
         input_layer['input_shape'] = [x.dim_value for x in inp_shape]
-        
+
         if len(input_layer['input_shape']) > 1:
-            input_layer['input_shape'][0] = None #Firt dim is batch
+            input_layer['input_shape'][0] = None #First dim is batch
 
+        print('Input shape:', input_layer['input_shape'])
         #Clean the layer name for specific models
         sanitize_layer_name(input_layer)
         input_layers[i] = input_layer['name']
 
         layer_list.append(input_layer)
 
+    for i, constant in enumerate(constant_layers):
+        constant_layer = {}
+        constant_layer['name'] = replace_char_inconsitency(constant)
+        constant_layer['class_name'] = 'Constant'
+        constant_layer['value'] = get_constant_value(model.graph, constant)
+
+        #Clean the layer name for specific models
+        sanitize_layer_name(constant_layer)
+        constant_layers[i] = constant_layer['name']
+
+        layer_list.append(constant_layer)
+
     # Defined supported layers and check for unsupported layer type
-    skip_layers = ['Dropout', 'Identity', 'Flatten']
-    
+    skip_layers = ['Dropout', 'Identity']
+
     #Map inputs of skipped layers
     inputs_map = {}
-    
+
     supported_layers = get_supported_onnx_layers() + skip_layers
-    
-    # Get input shape
-    current_shape = [input_layer['input_shape']]
-    print('Input shape:', current_shape[0])
-    
-    #Loop through layers
-    layer_counter = 0
-    
-    #Output shape tracking
-    output_shapes = {}
-    output_shape = None
 
     print('Topology:')
-    for node in graph.node:
-        
+    for node in model.graph.node:
+
         if node.op_type not in supported_layers:
             raise Exception('ERROR: Unsupported operation type: {}'.format(node.op_type))
-        
-        #If not the first layer then input shape is taken from last layer's output
-        if layer_counter != 0:
-            current_shape = [output_shape]
-            
+
+        current_shape = get_input_shape(model.graph, node)
+
         if node.op_type in skip_layers:
-            if node.op_type == 'Flatten':
-                output_shape = [current_shape[0][0], np.prod(current_shape[0][1:])]
-            
-            else:
-                #Currently supported skipped layers have only one input and output
-                #Skipped layers can follow each other (e.g., Dropout -> Flatten)
-                
-                #Mapping inputs
-                input_name = inputs_map.get(node.input[0], node.input[0])
-                output_name = node.output[0]
-                inputs_map[output_name] = input_name
-                
-                output_shape = current_shape[0]
-            continue 
-        
-        if node.op_type in supported_layers:
-            layer_counter = layer_counter + 1
-        
+            #Currently supported skipped layers have only one input and output
+            #Skipped layers can follow each other (e.g., Dropout -> Flatten)
+
+            #Mapping inputs
+            input_name = inputs_map.get(node.input[0], node.input[0])
+            output_name = node.output[0]
+            inputs_map[output_name] = input_name
+            continue
+
         #Process the layer
-        layer, output_shape = layer_handlers[node.op_type](reader, node, inputs_map, current_shape, graph, config)
-        
+        layer = layer_handlers[node.op_type](reader, node, inputs_map, current_shape, model.graph, config)
+
         sanitize_layer_name(layer)
         print('Layer name: {}, layer type: {}, current shape: {}'.format(layer['name'], layer['class_name'], current_shape))
         layer_list.append(layer)
diff --git a/hls4ml/model/attributes.py b/hls4ml/model/attributes.py
index df4b40edda..cd54729f8e 100644
--- a/hls4ml/model/attributes.py
+++ b/hls4ml/model/attributes.py
@@ -1,9 +1,10 @@
 from collections.abc import MutableMapping
+from numbers import Integral
 
 from hls4ml.model.types import NamedType, TensorVariable, WeightVariable
 
 class Attribute(object):
-    def __init__(self, name, value_type=int, default=None, configurable=False):
+    def __init__(self, name, value_type=Integral, default=None, configurable=False):
         self.name = name
         self.value_type = value_type
         self.default = default
diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
index 3f4b13ff35..9555e528af 100644
--- a/hls4ml/model/graph.py
+++ b/hls4ml/model/graph.py
@@ -451,7 +451,9 @@ def remove_node(self, node, rewire=True):
 
         """
         if rewire:
-            if len(node.inputs) > 1 or len(node.outputs) > 1:
+            inputs = [inp for inp in node.inputs if inp]
+            outputs = [outp for outp in node.outputs if outp]
+            if len(inputs) > 1 or len(outputs) > 1:
                 raise Exception('Cannot rewire a node with multiple inputs/outputs')
             prev_node = self.graph.get(node.inputs[0])
             next_node = next((x for x in self.graph.values() if node.outputs[0] in x.inputs), None)
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 4727b7f1d8..7328e00f2b 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -1,5 +1,6 @@
 import numpy as np
 import six
+import typing
 
 from hls4ml.model.types import NamedType
 from hls4ml.model.types import TensorVariable, WeightVariable, CompressedWeightVariable, ExponentWeightVariable
@@ -13,7 +14,7 @@
 class classproperty(object):
     def __init__(self, func):
         self.func = func
-    
+
     def __get__(self, obj, owner):
         return self.func(owner)
 
@@ -93,7 +94,7 @@ def _validate_attributes(self):
         all_attributes = {}
         for attr in self.expected_attributes:
             all_attributes[attr.name] = attr
-        
+
         # Validate existing attributes
         for attr_name, attr_value in self.attributes.items():
             exp_attr = all_attributes.pop(attr_name, None)
@@ -103,7 +104,7 @@ def _validate_attributes(self):
                         .format(attr_name, self.name, self.class_name, exp_attr.value_type, type(attr_value), attr_value))
             else:
                 pass # TODO layer contains attribute that is not expected. we can log this for debugging
-        
+
         # If any expected attributes remain, try adding their default values
         for attr_name, attr in all_attributes.items():
             if attr.default is not None:
@@ -264,15 +265,76 @@ def initialize(self):
         precision = self.attributes.get('precision', None)
         self.add_output_variable(shape, dims, var_name=self.name, type_name=type_name, precision=precision)
 
+class Constant(Layer):
+    _expected_attributes = [
+        Attribute('value', value_type=np.ndarray),
+    ]
+
+    def initialize(self):
+        value = self.attributes['value']
+        # the weight variable seems to not be used, so no need to make it.
+        # self.add_weights_variable(name='value', data=value, precision=self.get_attr("quant_precision"), quantizer=self.get_attr("quantizer"))
+        self.value = value  # note, this is unquantized; Only here for easier access
+        shape = value.shape
+        if not shape:
+            shape = (1,)
+            self.value = np.array([self.value])
+        dims = [f'{self.name}_{i}' for i in range(len(shape))]
+        self.add_output_variable(shape, dims, var_name=self.name, precision=self.get_attr("precision"))
+
+class Quant(Layer):  # The QONNX quantization layer
+    """
+    This is a QONNX quantization layer. Optimizations should convert it
+    before HLS is produced.
+    """
+    _expected_attributes = [
+        Attribute('narrow', value_type=bool),
+        Attribute('rounding_mode', value_type=str),
+        Attribute('signed', value_type=bool)
+    ]
+
+    def initialize(self):
+        inp = self.get_input_variable(self.inputs[0])
+        shape = inp.shape
+        dims = inp.dim_names
+        self.add_output_variable(shape, dims)
+
 class Reshape(Layer):
+    _expected_attributes = [
+        Attribute('target_shape', value_type=typing.Sequence),
+    ]
     def initialize(self):
-        shape = self.attributes['target_shape']
-        if shape[0] is None:
-            shape = shape[1:]
-        dims = ['N_SIZE_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)]
+        input_shape =  self.get_input_variable(self.inputs[0]).shape
+        target_shape = self.get_attr('target_shape')
+        if target_shape is None:
+            # need to get it from the input
+
+            shape_node = self.get_input_node(self.inputs[1])
+            target_shape = shape_node.value
+
+        # remove Nones or leading ones
+        if target_shape[0] is None or (len(target_shape) > 1 and target_shape[0] == 1):
+            # the latter case is for QONNX
+            target_shape = target_shape[1:]
+        # take care of -1 shapes
+        shape = self.infer_shape(input_shape, target_shape)
+
+        #update the target shape with chnges from above
+        self.set_attr('target_shape', shape)
+
+        dims = ['N_SIZE_{}_{}'.format(i, self.index) for i in range(len(shape))]
 
         self.add_output_variable(shape, dims)
 
+    @staticmethod
+    def infer_shape(input_shape, target_shape):
+        """This infers -1 shapes"""
+        if -1 in target_shape:  #Need to infer shape for -1
+            dummy_x = np.ones(input_shape)
+            dummy_y = np.reshape(dummy_x, target_shape)
+            target_shape = list(dummy_y.shape)
+        return target_shape
+
 class Dense(Layer):
     _expected_attributes = [
         Attribute('n_in'),
@@ -296,6 +358,25 @@ def initialize(self):
         self.add_weights(quantizer=self.get_attr('weight_quantizer'), compression=self.model.config.get_compression(self))
         self.add_bias(quantizer=self.get_attr('bias_quantizer'))
 
+class Conv(Layer):
+    """
+    This is for the ONNX Conv node. Currently, it is only supported as an intermediate
+    form that gets converted to an explicit ConvXD.
+
+    Note:  these are always channels-last.
+    """
+    def initialize(self):
+        # use negative indexing because it is not clear if batch dimension is always stripped
+        if self.attributes['n_dim'] == 1:
+            # this is 1D convolution
+            shape = [self.attributes['n_out'], self.attributes['n_filt']]
+            dims = ['N_OUTPUTS_{}'.format(self.index), 'N_FILT_{}'.format(self.index)]
+        else:
+            shape = [self.attributes['out_height'], self.attributes['out_width'], self.attributes['n_filt']]
+            dims = ['OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index), 'N_FILT_{}'.format(self.index)]
+
+        self.add_output_variable(shape, dims)
+
 class Conv1D(Layer):
     _expected_attributes = [
         Attribute('in_width'),
@@ -342,7 +423,7 @@ class SeparableConv1D(Layer):
 
         Attribute('pad_left'),
         Attribute('pad_right'),
-        
+
         WeightAttribute('depthwise'),
         WeightAttribute('pointwise'),
         WeightAttribute('bias'),
@@ -360,13 +441,13 @@ def initialize(self):
             shape = [self.attributes['n_filt'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
         self.add_output_variable(shape, dims)
-        
+
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         pointwise_data = self.model.get_weights_data(self.name, 'pointwise_kernel')
 
         self.add_weights_variable(name='depthwise', var_name='d{index}', data=depthwise_data, quantizer=self.get_attr('depthwise_quantizer'))
         self.add_weights_variable(name='pointwise', var_name='p{index}', data=pointwise_data, quantizer=self.get_attr('pointwise_quantizer'))
-        
+
         zero_bias_data = np.zeros((self.attributes['n_chan'],))
         self.add_weights_variable(name='zero_bias', var_name='z{index}', data=zero_bias_data)
 
@@ -476,7 +557,7 @@ class SeparableConv2D(Layer):
         Attribute('pad_bottom'),
         Attribute('pad_left'),
         Attribute('pad_right'),
-        
+
         WeightAttribute('depthwise'),
         WeightAttribute('pointwise'),
         WeightAttribute('bias'),
@@ -494,13 +575,13 @@ def initialize(self):
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
         self.add_output_variable(shape, dims)
-        
+
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         pointwise_data = self.model.get_weights_data(self.name, 'pointwise_kernel')
 
         self.add_weights_variable(name='depthwise', var_name='d{index}', data=depthwise_data, quantizer=self.get_attr('depthwise_quantizer'))
         self.add_weights_variable(name='pointwise', var_name='p{index}', data=pointwise_data, quantizer=self.get_attr('pointwise_quantizer'))
-        
+
         zero_bias_data = np.zeros((self.attributes['n_chan'],))
         self.add_weights_variable(name='zero_bias', var_name='z{index}', data=zero_bias_data)
 
@@ -661,7 +742,7 @@ class Activation(Layer):
         Attribute('n_in'),
         Attribute('activation', value_type=str),
         #Attribute('table_size', default=1024),
-        
+
         #TypeAttribute('table')
     ]
 
@@ -699,6 +780,17 @@ class TernaryTanh(Activation):
     def initialize(self):
         super(TernaryTanh, self).initialize()
 
+class BatchNormOnnx(Layer):
+    '''
+    A transient layer formed from ONNX BatchNormalization that gets converted to
+    BatchNormalization after the scale and bias are determined
+    '''
+    def initialize(self):
+        inp = self.get_input_variable()
+        shape = inp.shape
+        dims = inp.dim_names
+        self.add_output_variable(shape, dims)
+
 class BatchNormalization(Layer):
     _expected_attributes = [
         Attribute('n_in'),
@@ -717,16 +809,41 @@ def initialize(self):
         dims = inp.dim_names
         self.add_output_variable(shape, dims)
 
-        gamma = self.model.get_weights_data(self.name, 'gamma')
-        beta = self.model.get_weights_data(self.name, 'beta')
-        mean = self.model.get_weights_data(self.name, 'moving_mean')
-        var = self.model.get_weights_data(self.name, 'moving_variance')
+        if not self.get_attr('scale'):
+            gamma = self.model.get_weights_data(self.name, 'gamma')
+            beta = self.model.get_weights_data(self.name, 'beta')
+            mean = self.model.get_weights_data(self.name, 'moving_mean')
+            var = self.model.get_weights_data(self.name, 'moving_variance')
+
+            scale = gamma / np.sqrt(var + self.get_attr('epsilon'))
+            bias = beta - gamma * mean / np.sqrt(var + self.get_attr('epsilon'))
+
+            self.add_weights_variable(name='scale', var_name='s{index}', data=scale)
+            self.add_weights_variable(name='bias', var_name='b{index}', data=bias)
+
+class ApplyAlpha(BatchNormalization):
+    ''' A custom layer to scale the output of a QDense layer which used 'alpha != 1'
+        Inference computation uses BatchNormalization methods'''
+
+    def initialize(self):
+        inp = self.get_input_variable()
+        shape = inp.shape
+        dims = inp.dim_names
+        self.add_output_variable(shape, dims)
+
+        scale = self.get_attr('scale_data')
+        scale_quantizer = self.get_attr('scale_quantizer')
+        bias = self.get_attr('bias_data')
+        bias_quantizer = self.get_attr('bias_quantizer')
 
-        scale = gamma / np.sqrt(var + self.get_attr('epsilon'))
-        bias = beta - gamma * mean / np.sqrt(var + self.get_attr('epsilon'))
+        self.add_weights(scale, quantizer=scale_quantizer)
+        self.add_bias(bias, quantizer=bias_quantizer)
 
-        self.add_weights_variable(name='scale', var_name='s{index}', data=scale)
-        self.add_weights_variable(name='bias', var_name='b{index}', data=bias)
+    def add_weights(self, scale, quantizer=None):
+        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer)
+
+    def add_bias(self, bias, quantizer=None):
+        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer)
 
 class Merge(Layer):
     def initialize(self):
@@ -741,6 +858,29 @@ def initialize(self):
             dims = inp1.dim_names
         self.add_output_variable(shape, dims)
 
+class MatMul(Layer):
+    """
+    This is a matrix multiply. Currently, it is only supported as an intermediate
+    form that gets converted to a Dense layer.
+    """
+    def initialize(self):
+        assert(len(self.inputs) == 2)
+        inp1 = self.get_input_variable(self.inputs[0])
+        inp2 = self.get_input_variable(self.inputs[1])
+        if len(inp2.shape) == 1:
+            # mat vec multiply
+            assert(inp1.shape[-1] == inp2.shape[0])
+            shape = inp1.shape[:-1] + [inp2.shape[0]]
+        else:
+            assert(inp1.shape[-1] == inp2.shape[-2])
+            shape = inp1.shape[:-1] + [inp2.shape[-1]]
+        if len(shape) > 1:
+            dims = ['N_LAYER_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)]
+        else:
+            dims = ['N_LAYER_{}'.format(self.index)]
+
+        self.add_output_variable(shape, dims)
+
 class Dot(Merge):
     def initialize(self):
         assert(len(self.inputs) == 2)
@@ -791,9 +931,9 @@ def initialize(self):
 
         if len(perm) > 3:
             raise Exception('ERROR: Transpose of tensors with rank > 3 is not yet supported.')
-        
+
         #ONNX double transpose specific, sometimes ONNX injects
-        #useless double transpose layers when converting 
+        #useless double transpose layers when converting
         #from other frameworks
         if len(perm) == 1:
             shape = inp.shape #dummy shape
@@ -803,7 +943,7 @@ def initialize(self):
             shape = [inp.shape[i] for i in perm]
 
         self.set_attr('perm_str', ','.join([str(i) for i in perm]))
-        
+
         if len(shape) == 2:
             self.set_attr('perm_str', ','.join(['0'] + [str(i+1) for i in perm]))
             dims = ['OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
@@ -909,7 +1049,7 @@ def _add_variable(self, name, var_name, data, frac_width=10, quantize=False):
 
         # automatically make the variable unsigned if data are all positive
         signed = (np.amin(data) < 0.)
-        
+
         int_width = find_minimum_width(data, signed=signed)
 
         if quantize:
@@ -917,9 +1057,9 @@ def _add_variable(self, name, var_name, data, frac_width=10, quantize=False):
         else:
             width = int_width + frac_width
             precision = FixedPrecisionType(width=width, integer=int_width, signed=signed, rounding_mode='AP_RND', saturation_mode='AP_SAT')
-            
+
         self.add_weights_variable(name=name, var_name=var_name, data=data, precision=precision)
-        
+
 class GarNetStack(GarNet):
     def _initialize_transforms(self):
         self._sublayer_weights = []
@@ -942,13 +1082,13 @@ def _initialize_transforms(self):
             name = 'input_transform_{}_biases'.format(il)
             self._add_variable(name, 'input_transform_{}_b{{index}}'.format(il), bias, frac_width=10, quantize=quantize)
             sublayer_weights['input_transform_biases'] = self.weights[name]
-        
+
             weights_source = [
                 ('aggregator_distance', 'S{}'.format(il), 'kernel'),
                 ('aggregator_distance', 'S{}'.format(il), 'bias'),
                 ('output_transform', 'Fout{}'.format(il), 'bias')
             ]
-    
+
             for op_name, lname, wtype in weights_source:
                 data = self.model.get_weights_data(self.name, '{name}/{lname}_{wtype}:0'.format(name=self.name, lname=lname, wtype=wtype))
                 if wtype == 'kernel':
@@ -970,6 +1110,7 @@ def _initialize_transforms(self):
 layer_map = {
     'Input'                  : Input,
     'InputLayer'             : Input,
+    'Constant'               : Constant,
     'Activation'             : Activation,
     'QActivation'            : Activation,
     'LeakyReLU'              : ParametrizedActivation,
@@ -983,6 +1124,7 @@ def _initialize_transforms(self):
     'BinaryDense'            : Dense,
     'TernaryDense'           : Dense,
     'QDense'                 : Dense,
+    'Conv'                   : Conv,
     'Conv1D'                 : Conv1D,
     'QConv1D'                : Conv1D,
     'Conv2D'                 : Conv2D,
@@ -1005,6 +1147,7 @@ def _initialize_transforms(self):
     'ZeroPadding1D'          : ZeroPadding1D,
     'ZeroPadding2D'          : ZeroPadding2D,
     'Merge'                  : Merge,
+    'MatMul'                 : MatMul,
     'Dot'                    : Dot,
     'Concatenate'            : Concatenate,
     'Resize'                 : Resize,
@@ -1012,6 +1155,9 @@ def _initialize_transforms(self):
     'Transpose'              : Transpose,
     'GarNet'                 : GarNet,
     'GarNetStack'            : GarNetStack,
+    'Quant'                  : Quant,
+    'ApplyAlpha'             : ApplyAlpha,
+    'BatchNormOnnx'          : BatchNormOnnx,
     # TensorFlow-specific layers:
     'BiasAdd'                : BiasAdd,
 }
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index ba614619f9..94865e790a 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -1,3 +1,4 @@
+from email.mime import base
 from hls4ml.model.flow.flow import register_flow
 import os
 
@@ -11,12 +12,36 @@
 for opt_name, opt in optimizers.items():
     register_pass(opt_name, opt)
 
+
+base_convert = [
+    'fuse_bias_add', 
+    'remove_useless_transpose',
+    'reshape_constant',
+    'quant_constant_parameters',
+    'quant_to_activation',
+    'fuse_quant_with_constant',
+    'const_quant_to_const_alpha',
+    'batch_norm_onnx_constant_parameters',
+    'constant_batch_norm_fusion',
+    'merge_two_constants',
+    'scale_down_add',
+    'scale_down_mat_mul',
+    'scale_down_weight_conv',
+    'scale_down_bias_conv',
+    'scale_down_conv',
+    'merge_to_batch_normalization',
+    'merge_to_batch_normalization_div',
+
+    ]
+
 try:
     import qkeras
-    register_flow('convert', ['fuse_bias_add', 'remove_useless_transpose', 'output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization']) # TODO Maybe not all QKeras optmizers belong here?
+    # TODO Maybe not all QKeras optmizers belong here?
+    register_flow('convert', base_convert 
+        + ['output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization']) 
     register_flow('optimize', ['eliminate_linear_activation', 'fuse_consecutive_batch_normalization', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv'], requires=['convert'])
 except:
-    register_flow('convert', ['fuse_bias_add', 'remove_useless_transpose'])
+    register_flow('convert', base_convert)
     register_flow('optimize', ['eliminate_linear_activation', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv'], requires=['convert'])
 
 del opt_path
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
new file mode 100644
index 0000000000..d1c7154c59
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -0,0 +1,163 @@
+import numpy as np
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
+
+class BatchNormOnnxConstantParameters(OptimizerPass):
+    """ Remove Constant from the BatchNormalization node parameters (but not input[0]) """
+    def match(self, node):
+        is_match = (isinstance(node, BatchNormOnnx)
+                    and any(node.inputs[1:]))
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove Constant from the BatchNormalization node parameters (but not input[0])
+        """
+
+        if not (len(node.inputs) == 5 and all(node.inputs)):
+            raise ValueError(f"All {len.node.inputs} BatchNormOnnnx inputs need to be defined")
+
+        gamma_node = node.get_input_node(node.inputs[1])
+        if not isinstance(gamma_node, Constant):
+            raise TypeError("Only consant gammas supported")
+        gamma = gamma_node.value
+        node.set_attr('gamma', gamma)
+        node.inputs[1] = ''
+        model.remove_node(gamma_node, rewire=False)
+
+        beta_node = node.get_input_node(node.inputs[2])
+        if not isinstance(beta_node, Constant):
+            raise TypeError("Only consant betas supported")
+        beta = beta_node.value
+        node.set_attr('beta', beta)
+        node.inputs[2] = ''
+        model.remove_node(beta_node, rewire=False)
+
+        moving_mean_node = node.get_input_node(node.inputs[3])
+        if not isinstance(moving_mean_node, Constant):
+            raise TypeError("Only consant moving_means supported")
+        moving_mean = moving_mean_node.value
+        node.set_attr('moving_mean', moving_mean)
+        node.inputs[3] = ''
+        model.remove_node(moving_mean_node, rewire=False)
+
+        moving_variance_node = node.get_input_node(node.inputs[4])
+        if not isinstance(moving_variance_node, Constant):
+            raise TypeError("Only consant moving_variances supported")
+        moving_variance = moving_variance_node.value
+        node.set_attr('moving_variance', moving_variance)
+        node.inputs[4] = ''
+        model.remove_node(moving_variance_node, rewire=False)
+
+        scale = gamma / np.sqrt(moving_variance + node.get_attr('epsilon'))
+        bias = beta - gamma * moving_mean / np.sqrt(moving_variance + node.get_attr('epsilon'))
+        node.add_weights_variable("scale", data=scale, precision=node.get_attr("scale_precision"), quantizer=node.get_attr("bias_quantizer"))
+        node.add_weights_variable("bias", data=bias, precision=node.get_attr("bias_precision"), quantizer=node.get_attr("bias_quantizer"))
+
+        new_node = model.make_node(BatchNormalization, node.name, node.attributes, 
+            [node.inputs[0]], [x for x in node.outputs])
+
+        model.replace_node(node, new_node)
+
+        return True
+
+
+class ConstantBatchNormFusion(OptimizerPass):
+    """
+    Merge BatchNorm into Const (after parameters have already been merged in BatchNormalization)
+    """
+    def match(self, node):
+        is_match = (isinstance(node, BatchNormalization)
+                    and not any(node.inputs[1:])
+                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and not node.get_input_node(node.inputs[0]).get_attr("quant_precision"))
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove the batch norm
+        """
+        const_node = node.get_input_node(node.inputs[0])
+
+        new_val = const_node.value * node.weights["scale"].data_unquantized + node.weights["bias"].data_unquantized
+        const_node.set_attr("value", new_val)
+        const_node.set_attr("quantizer", node.get_attr("quantizer"))  # None if not defined
+        const_node.set_attr("quant_precision",  node.get_attr("quant_precision"))
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node.initialize()
+
+        # remove the batch norm node
+        model.remove_node(node, rewire=True)
+
+        return True
+
+
+class FuseConsecutiveBatchNormalization(OptimizerPass):
+    '''
+    OptimizerPass to merge consecutive BatchNormalization layers,
+    only if the earlier one does not have quantization specified
+    '''
+
+    def match(self, node):
+        return (isinstance(node, BatchNormalization)
+                and isinstance(node.get_input_node(node.inputs[0]), BatchNormalization)
+                and not node.get_input_node(node.inputs[0]).get_attr("quant_precision"))
+
+
+    def transform(self, model, node):
+        prev_node = node.get_input_node(node.inputs[0])
+
+        s0 = prev_node.weights['scale'].data_unquantized
+        b0 = prev_node.weights['bias'].data_unquantized
+        s1 = node.weights['scale'].data_unquantized
+        b1 = node.weights['bias'].data_unquantized
+
+        scale_new = s0 * s1
+        bias_new = s1 * b0 + b1
+
+        # call function so that quantizer would be called if needed
+        node.add_weights_variable(name='scale', data=scale_new, precision=node.get_attr("scale_precision"), quantizer=node.get_attr("scale_quantizer"))
+        node.add_weights_variable(name='bias', data=bias_new, precision=node.get_attr("bias_precision"), quantizer=node.get_attr("bias_quantizer"))
+
+        model.remove_node(prev_node, rewire=True)
+        return True
+
+
+class BroadcastWeightsBatchNormalization(OptimizerPass):
+    '''
+    The scale and bias need to be broadcast to appropriate size before systhesis
+    '''
+
+    def match(self, node):
+        return isinstance(node, BatchNormalization)
+
+
+    def transform(self, model, node):
+
+        input_shape = node.get_input_variable().shape
+
+        scale = node.weights['scale'].data_unquantized
+        bias = node.weights['bias'].data_unquantized
+
+        n_filt = node.get_attr('n_filt', -1)
+
+        n_scale_bias = np.prod(input_shape) if n_filt == -1 else n_filt
+
+        scale_bias_shape = input_shape if n_filt == -1 else (n_filt,)
+        node.set_attr("n_scale_bias", n_scale_bias)
+
+        # Check shape, broadcast if needed.
+        if scale.shape != scale_bias_shape:
+            node.add_weights_variable(name='scale', data=np.broadcast_to(scale, scale_bias_shape),
+                                      precision=node.get_attr("scale_precision"),
+                                      quantizer=node.get_attr("scale_quantizer"))
+
+        if bias.shape != scale_bias_shape:
+            node.add_weights_variable(name='bias', data=np.broadcast_to(bias, scale_bias_shape),
+                                      precision=node.get_attr("bias_precision"),
+                                      quantizer=node.get_attr("bias_quantizer"))
+
+        # I think there's no need to restart; also, it prevents an infinite loop
+        return False
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
new file mode 100644
index 0000000000..2229c83e02
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -0,0 +1,83 @@
+import numpy as np
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType
+from hls4ml.model.layers import Conv, Constant
+
+class ConvToConvXD(OptimizerPass):
+    """ Convert Conv with constant to a Conv1D or Conv2D layer """
+    def match(self, node):
+        is_match = (isinstance(node, Conv)
+                    and ((len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant))
+                          or (len(node.inputs) == 3
+                              and isinstance(node.get_input_node(node.inputs[1]), Constant)
+                              and isinstance(node.get_input_node(node.inputs[2]), Constant))))
+
+        return is_match
+
+    def transform(self, model, node):
+        """ Convert Conv with constant to a Conv1D or Conv2D layer """
+
+        input_node = node.get_input_node(node.inputs[0])
+        input_precision = input_node.get_attr("quant_precision")
+        weight_node = node.get_input_node(node.inputs[1])
+        weight_precision = weight_node.get_attr("quant_precision")
+        bias_node = None
+        bias_precision = None
+        if len(node.inputs) == 3:
+            bias_node = node.get_input_node(node.inputs[2])
+            bias_precision = bias_node.get_attr("quant_precision")
+
+        # copy the attributes to the new node. (No need to explictily copy since the old node is deleted)
+        attributes = node.attributes
+
+        quant_precision = None
+
+        if weight_precision and input_precision and (bias_precision or not bias_node):
+            if (weight_precision.width != weight_precision.integer
+                or input_precision.width != input_precision.integer):
+                raise ValueError("quant_precisions must always have the same width and integer parameters")
+
+            num_feature_maps = weight_node.value.shape[0]
+            Nacc = attributes['filt_width'] * attributes.get('filt_height', 1) * num_feature_maps
+            bitwidth = weight_precision.width + input_precision.width + int(np.ceil(np.log2(Nacc)))
+            signed = weight_precision.signed or input_precision.signed
+            # copy staruation and rounding from "other"
+            rounding_mode = input_precision.rounding_mode
+            saturation_mode = input_precision.saturation_mode
+
+            # correct if bias
+            if bias_node:
+                bitwidth = max(bitwidth + (bias_precision.signed and not signed),
+                               bias_precision.width + (signed and not bias_precision.signed)) + 1
+                signed = signed or bias_precision.signed
+            quant_precision = FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+
+        #creating the attributes
+
+        # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
+        if attributes['n_dim'] == 1:
+            nodetype = "Conv1D"
+            attributes["weight_data"] =  np.transpose(weight_node.value, (1, 2, 0))
+        else:
+            nodetype = "Conv2D"
+            attributes["weight_data"] =  np.transpose(weight_node.value, (1, 2, 3, 0))
+        attributes["weight_precision"] = weight_precision
+        attributes["weight_quantizer"] =  weight_node.get_attr("quantizer")
+        attributes["quant_precision"] = quant_precision
+
+        if bias_node:
+            attributes["bias_data"] =  bias_node.value
+            attributes["bias_precision"] = bias_precision,
+            attributes["bias_quantizer"] =  bias_node.get_attr("quantizer")
+
+        #making new node
+        new_node = model.make_node(nodetype, f"{nodetype}_{node.name}", attributes,
+            [node.inputs[0]], [x for x in node.outputs])
+
+        #removing and replacing old nodes
+        model.remove_node(weight_node, rewire=False)
+        if bias_node:
+            model.remove_node(bias_node, rewire=False)
+        model.replace_node(node, new_node)
+
+        return True
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
new file mode 100644
index 0000000000..7798ee7471
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -0,0 +1,67 @@
+import numpy as np
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType
+from hls4ml.model.layers import MatMul, Constant, Dense
+
+class MatmulConstToDense(OptimizerPass):
+    """ Convert MatMul with constant to a dense layer """
+    def match(self, node):
+        is_match = (isinstance(node, MatMul) and len(node.inputs) == 2
+                    and (isinstance(node.get_input_node(node.inputs[0]), Constant)
+                         or isinstance(node.get_input_node(node.inputs[1]), Constant)))
+        return is_match
+
+    def transform(self, model, node):
+        """ Substitute Matmul + Constant for a single dense """
+        #determining Constant layer input
+        matmul_node = node
+        const_node = None
+        const_inp_idx = 0
+        other_inp_idx = 1
+        if isinstance(matmul_node.get_input_node(matmul_node.inputs[0]), Constant):
+            const_node = matmul_node.get_input_node(matmul_node.inputs[0])
+            other_node = matmul_node.get_input_node(matmul_node.inputs[1])
+        else:
+            const_node = matmul_node.get_input_node(matmul_node.inputs[1])
+            other_node = matmul_node.get_input_node(matmul_node.inputs[0])
+            const_inp_idx = 1
+            other_inp_idx = 0
+
+        quant_precision = None
+        weight_precision = const_node.get_attr("quant_precision")
+        other_precision = other_node.get_attr("quant_precision")
+
+        if weight_precision and other_precision:
+            if (weight_precision.width != weight_precision.integer
+                or other_precision.width != other_precision.integer):
+                raise ValueError("quant_precisions must always have the same width and integer parameters")
+
+            Nacc = matmul_node.get_input_variable(matmul_node.inputs[0]).shape[-1]
+            bitwidth = weight_precision.width + other_precision.width + int(np.ceil(np.log2(Nacc)))
+            signed = weight_precision.signed or other_precision.signed
+            # copy staruation and rounding from "other"
+            rounding_mode = other_precision.rounding_mode
+            saturation_mode = other_precision.saturation_mode
+            quant_precision = FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+
+        #creating the attributes
+        attributes = matmul_node.attributes
+        attributes.update({
+            "weight_data": const_node.value,
+            "weight_precision": weight_precision,
+            "weight_quantizer": const_node.get_attr("quantizer"),
+            "quant_precision": quant_precision,
+            "omit_bias": True,
+            "n_in": const_node.value.shape[0],
+            "n_out": const_node.value.shape[1]
+        })
+
+        #making new node
+        new_dense = model.make_node(Dense, f"Dense_{matmul_node.name}", attributes,
+            [matmul_node.inputs[other_inp_idx]], [x for x in matmul_node.outputs])
+
+        #removing and replacing old nodes
+        model.remove_node(const_node, rewire=False)
+        model.replace_node(matmul_node, new_dense)
+
+        return True
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
new file mode 100644
index 0000000000..aa0c6ac376
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -0,0 +1,157 @@
+import numpy as np
+from hls4ml.model.layers import Merge, Constant
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class MergeTwoConstants(OptimizerPass):
+    """ Merge of two constants makes another constant """
+    def match(self, node):
+        is_match = (isinstance(node, Merge)
+                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and isinstance(node.get_input_node(node.inputs[1]), Constant))
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Merge of two constants makes another constant
+        """
+        const_node0 = node.get_input_node(node.inputs[0])
+        const_node1 = node.get_input_node(node.inputs[1])
+
+        val0 = const_node0.value
+        val1 = const_node1.value
+
+        op = node.attributes["op"]
+        if op in ('add', 'sum'):
+            new_val = val0 + val1
+        elif op == 'sub':
+            new_val = val0 - val1
+        elif op == 'mul':
+            new_val = val0 * val1
+        elif op == 'div':
+            new_val = val0 / val1
+        elif op == 'average':
+            new_val = np.mean( np.array([val0, val1]), axis=0 )
+        elif op == 'max':
+            new_val = np.maximum(val0, val1)
+        elif op == 'min':
+            new_val = np.minimum(val0, val1)
+        else:
+            raise RuntimeError(f"Unexpected op_type: {op}")
+
+        quantizer = node.get_attr("quantizer")  # None if not defined
+        if quantizer:
+            const_node0.set_attr("quantizer", quantizer)
+        const_node0.set_attr("value", new_val)
+
+        quant_precision = node.get_attr("quant_precision")
+        if quant_precision:
+            const_node0.set_attr("quant_precision", quant_precision)
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node0.initialize()
+
+        model.remove_node(const_node1, rewire=False)
+
+        # remove the batch norm node
+        model.remove_node(node, rewire=True)
+
+        return True
+
+class MergeToBatchNormalization(OptimizerPass):
+    """ Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization """
+    def match(self, node):
+        is_match = (isinstance(node, Merge)
+                    and node.attributes["op"] in ("add", "sum", "sub", "mul")  # Div is separate
+                    and (isinstance(node.get_input_node(node.inputs[0]), Constant)
+                         != isinstance(node.get_input_node(node.inputs[1]), Constant)))
+        # note: != for booleans is xor.
+        return is_match
+
+    def transform(self, model, node):
+
+        node1 = node.get_input_node(node.inputs[1])
+
+        node1const = isinstance(node1, Constant)
+        if node1const:
+            const_node = node1
+            input_node_idx = 0
+        else:
+            const_node = node.get_input_node(node.inputs[0])
+            input_node_idx = 1
+
+        input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape
+        n_in = np.prod(input_shape)
+
+
+        op = node.attributes["op"]
+        if op in ('add', 'sum'):
+            scale = np.array(1)
+            bias = const_node.value
+        elif op == 'sub':
+            if node1const:
+                scale = np.array(1)
+                bias = -const_node.value
+            else:
+                scale = np.array(-1)
+                bias = const_node.value
+
+        elif op == 'mul':
+            scale = const_node.value
+            bias = np.array(0)
+
+        attributes = node.attributes
+        attributes.update({
+            "simple": True,
+            "scale": scale,
+            "bias": bias,
+            "n_in": n_in,
+            "n_out": n_in,
+            "n_filt": -1
+        })
+
+        bn_layer = model.make_node("BatchNormalization", f"bn_{node.name}",
+                                   attributes,
+                                   [node.inputs[input_node_idx]], [x for x in node.outputs])
+
+        model.remove_node(const_node, rewire=False)
+        model.replace_node(node, bn_layer)
+
+        return True
+
+class MergeToBatchNormalizationDiv(OptimizerPass):
+    """ Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization """
+    def match(self, node):
+        is_match = (isinstance(node, Merge)
+                    and node.attributes["op"] == 'div'
+                    and isinstance(node.get_input_node(node.inputs[1]), Constant))  # only second can be const
+
+        return is_match
+
+    def transform(self, model, node):
+        input_shape = node.get_input_variable().shape
+        n_in = np.prod(input_shape)
+        const_node = node.get_input_node(node.inputs[1])
+        scale = 1/const_node.value
+        bias = np.array(0)
+
+        attributes = {
+            "simple": True,
+            "scale": scale,
+            "bias": bias,
+            "quant_precision": node.get_attr("quant_precision"),
+            "quantizer": node.get_attr("quantizer"),
+            "n_in": n_in,
+            "n_out": n_in,
+            "n_filt": -1
+        }
+
+        bn_layer = model.make_node("BatchNormalization", f"bn_{node.name}",
+                                   attributes,
+                                   [node.inputs[0]], [x for x in node.outputs])
+
+        model.remove_node(const_node, rewire=False)
+        model.replace_node(node, bn_layer)
+
+        return True
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
new file mode 100644
index 0000000000..7823ca97ba
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -0,0 +1,291 @@
+'''
+This file includes optimizations related to moving the ApplyAphas across MatMul and Conv nodes.
+
+TODO:  Check that biases are properly handled. (Attempt to do it via Merge)
+
+'''
+from copy import deepcopy
+import numpy as np
+from hls4ml.model.layers import ApplyAlpha, Constant, MatMul, Conv, Merge
+from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
+from hls4ml.model.optimizer import OptimizerPass
+
+class ScaleDownMatMul(OptimizerPass):
+    '''Shift an ApplyAlpha below a MatMul'''
+
+    def match(self, node):
+        '''Check to see if we have a MatMul with at least one input ApplyAlpha. Note, if both are this optimition runs twice'''
+        is_match = (isinstance(node, MatMul) and len(node.inputs) == 2
+                    and (isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
+                         or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)))
+        return is_match
+
+    def transform(self, model, node):
+        # determine input with ApplyAlpha. If both, first propagate apply alpha associated with a constant
+        is_aa = [False, False]
+        from_const = [False, False]
+        inp = [node.get_input_node(node.inputs[0]), node.get_input_node(node.inputs[1])]
+        for i in range(2):
+            if isinstance(inp[i], ApplyAlpha):
+                is_aa[i] = True
+                from_const[i] = isinstance(inp[i].get_input_node(inp[i].inputs[0]), Constant)
+
+        # prefer alpha from constant
+        if from_const[0]:
+            alpha_idx = 0
+        elif from_const[1]:
+            alpha_idx = 1
+        elif is_aa[0]:
+            alpha_idx = 0
+        else:
+            alpha_idx = 1  # is_aa[1] must be true
+
+        apply_alpha = inp[alpha_idx]
+        other_idx = 0 if alpha_idx else 1
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not bias.shape and bias == 0:
+            # zero bias, propagate through, if possible
+            # (always possible if scale is scalar)
+            try:
+                np.broadcast_to(scale, output.shape)  # check size compatibility
+                newscale = scale
+                newbias = np.array(0)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        # if did not succeed in propagating, try again
+        if not can_propagate and isinstance(inp[other_idx], Constant):
+            # can handle nonzero bias in some cases if other value is a Constant
+            try:
+                np.broadcast_to(scale, output.shape)  # check size compatibility
+                newscale = scale
+                newbias = inp[other_idx].value * bias
+                np.broadcast_to(newbias, output.shape)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
+
+class ScaleDownAdd(OptimizerPass):
+    '''Shift an identical ApplyAlpha below a Merge (Add)'''
+
+    def match(self, node):
+        '''Check to see if we have an add with two ApplyAlphas with identical scale'''
+        is_match = (isinstance(node, Merge) and len(node.inputs) == 2
+                    and node.attributes["op"] == "add")
+        if is_match:
+            in0 = node.get_input_node(node.inputs[0])
+            in1 = node.get_input_node(node.inputs[1])
+            is_match = (isinstance(in0, ApplyAlpha) and isinstance(in1, ApplyAlpha)
+                    and (in0.weights['scale'].data_unquantized == in1.weights['scale'].data_unquantized).all())
+        return is_match
+
+    def transform(self, model, node):
+
+        in0 = node.get_input_node(node.inputs[0])
+        in1 = node.get_input_node(node.inputs[1])
+
+        # Check if we can move
+        scale = in0.weights['scale'].data_unquantized
+        bias0 = in0.weights['bias'].data_unquantized
+        bias1 = in1.weights['bias'].data_unquantized
+        try:
+            bias = bias0 + bias1
+        except ValueError:
+            return False
+
+        model.remove_node(in0)
+        model.remove_node(in1)
+
+        new_node = model.make_node('ApplyAlpha', in0.name, in0.attributes, [x for x in node.outputs])
+        new_node.add_weights(scale)
+        new_node.add_bias(bias)
+        model.insert_node(new_node)
+        return True
+
+
+class ScaleDownConv(OptimizerPass):
+    '''Shift an ApplyAlpha on input below a Conv'''
+
+    def match(self, node):
+        '''Shift an ApplyAlpha from the Weight'''
+        is_match = (isinstance(node, Conv)
+                    and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha))
+
+        return is_match
+
+    def transform(self, model, node):
+
+        apply_alpha = node.get_input_node(node.inputs[0])
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not bias.shape and bias == 0:
+            # zero bias, propagate through, if possible
+            # (always possible if scale is scalar)
+            try:
+                np.broadcast_to(scale, output.shape)  # check broadcastable
+                newscale = scale
+                newbias = np.array(0)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
+
+class ScaleDownWeightConv(OptimizerPass):
+    '''Shift an ApplyAlpha weight (from conv side) below a Conv'''
+
+    def match(self, node):
+        '''Shift an ApplyAlpha from the Weight'''
+        is_match = (isinstance(node, Conv) and len(node.inputs) > 1
+                    and isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha))
+
+        return is_match
+
+    def transform(self, model, node):
+
+        apply_alpha = node.get_input_node(node.inputs[1])
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not bias.shape and bias == 0:
+            # zero bias, propagate through, if possible
+            # (always possible if scale is scalar)
+            try:
+                np.broadcast_to(scale, output.shape)  # make sure broadcastable
+                newscale = scale
+                newbias = np.array(0)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
+
+class ScaleDownBiasConv(OptimizerPass):
+    '''Shift an ApplyAlpha bias (from conv side) below a Conv'''
+
+    def match(self, node):
+        '''Shift an ApplyAlpha from the Weight'''
+        is_match = (isinstance(node, Conv) and len(node.inputs) > 2
+                    and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha))
+
+        return is_match
+
+    def transform(self, model, node):
+
+        apply_alpha = node.get_input_node(node.inputs[2])
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not scale.shape and scale == 1:
+            # No scale, just additional bias
+            try:
+                np.broadcast_to(bias, output.shape)
+                newscale = np.array(1)
+                newbias = bias
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
diff --git a/hls4ml/model/optimizer/passes/nop.py b/hls4ml/model/optimizer/passes/nop.py
index daf3e71fc4..05104b1e2a 100644
--- a/hls4ml/model/optimizer/passes/nop.py
+++ b/hls4ml/model/optimizer/passes/nop.py
@@ -1,13 +1,35 @@
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import Activation
 
 class EliminateLinearActivation(OptimizerPass):
     def match(self, node):
         cast = False
-        if isinstance(node, Activation):
+        if node.__class__.__name__ == 'Activation':
             cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision
-        return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast
+        return node.__class__.__name__ == 'Activation' and node.get_attr('activation') == 'linear' and not cast
     
     def transform(self, model, node):
         model.remove_node(node)
         return True
+
+class EliminateLinearActivationQuant(OptimizerPass):
+    '''
+    This is to optimize away lots of linear qantizations in QONNX. May have to restrict it
+    more if it causes problems.
+    '''
+    def match(self, node):
+        '''
+        Only match if this activation is from quant node and previous node precision is not set  by a quant node already.
+        '''
+        is_match = (node.__class__.__name__ == 'Activation' and node.get_attr('activation') == 'linear'
+                    and node.get_attr("quant_precision")
+                    and not node.get_input_node(node.inputs[0]).get_attr("quant_precision"))
+        return is_match
+
+    def transform(self, model, node):
+        prev_node = node.get_input_node(node.inputs[0]);
+        quant_precision = node.get_attr("quant_precision")
+        prev_node.set_attr("quant_precision", quant_precision)
+        prev_node.set_attr("quantizer", node.get_attr("quantizer"))
+        prev_node.update_output_precision(quant_precision)
+        model.remove_node(node)
+        return True
diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py
index b479287141..158baef494 100644
--- a/hls4ml/model/optimizer/passes/qkeras.py
+++ b/hls4ml/model/optimizer/passes/qkeras.py
@@ -1,5 +1,5 @@
 from hls4ml.model.optimizer import OptimizerPass, ConfigurableOptimizerPass, register_pass
-from hls4ml.model.layers import BatchNormalization, Dense, Conv1D, Conv2D, register_layer, layer_map
+from hls4ml.model.layers import ApplyAlpha, register_layer
 from hls4ml.model.types import IntegerPrecisionType, FixedPrecisionType, ExponentPrecisionType, NamedType
 import tensorflow as tf
 import numpy as np
@@ -78,39 +78,12 @@ def precision_string_modify(self, pstr):
         pstr = pstr.replace('>', mode)
         return pstr
 
-class ApplyAlpha(BatchNormalization):
-    ''' A custom layer to scale the output of a QDense layer which used 'alpha != 1'
-        Inference computation uses BatchNormalization methods'''
-
-    def initialize(self):
-        inp = self.get_input_variable()
-        shape = inp.shape
-        dims = inp.dim_names
-        self.add_output_variable(shape, dims)
-
-        scale = self.get_attr('scale_data')
-        scale_quantizer = self.get_attr('scale_quantizer')
-        bias = self.get_attr('bias_data')
-        bias_quantizer = self.get_attr('bias_quantizer')
-        
-        self.add_weights(scale, quantizer=scale_quantizer)
-        self.add_bias(bias, quantizer=bias_quantizer)
-
-    def add_weights(self, scale, quantizer=None):
-        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer)
-
-    def add_bias(self, bias, quantizer=None):
-        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer)
-
 def register_qkeras():
-    # Register the layer types to the layer map
-    register_layer('ApplyAlpha', ApplyAlpha)
-
     # Register the optimization passes
     register_pass('output_rounding_saturation_mode', OutputRoundingSaturationMode)
     register_pass('qkeras_factorize_alpha', QKerasFactorizeAlpha)
     register_pass('extract_ternary_threshold', ExtractTernaryThreshold)
-    register_pass('fuse_consecutive_batch_normalization', FuseConsecutiveBatchNormalization)
+    # register_pass('fuse_consecutive_batch_normalization', FuseConsecutiveBatchNormalization)
 
 class QKerasFactorizeAlpha(OptimizerPass):
     '''OptimizerPass for extracting alpha "scale" from QKeras quantized layer.
@@ -201,33 +174,36 @@ def transform(self, model, node):
         model.insert_node(alpha_layer)
         return True
 
-class FuseConsecutiveBatchNormalization(OptimizerPass):
-    '''OptimizerPass to merge consecutive BatchNormalization layers.
-       These may exist in a model after QKerasFactorizeAlpha layer.
-       Scale and Bias of each layer are combined into scale and bias of a single layer.
-    '''
+# # This has been replaced with a more generic one, but I am not sure if it
+# # does the appropriate quantizations for keras. If needed, we can modfiy the other one.
 
-    def match(self, node):
-        return isinstance(node, BatchNormalization) and \
-               isinstance(node.get_input_node(), BatchNormalization)
+# class FuseConsecutiveBatchNormalization(OptimizerPass):
+#     '''OptimizerPass to merge consecutive BatchNormalization layers.
+#        These may exist in a model after QKerasFactorizeAlpha layer.
+#        Scale and Bias of each layer are combined into scale and bias of a single layer.
+#     '''
 
-    def transform(self, model, node):
-        bn0 = node.get_input_node()
-        bn1 = node
+#     def match(self, node):
+#         return isinstance(node, BatchNormalization) and \
+#                isinstance(node.get_input_node(), BatchNormalization)
 
-        s0 = bn0.weights['scale'].data
-        b0 = bn0.weights['bias'].data
-        s1 = bn1.weights['scale'].data
-        b1 = bn1.weights['bias'].data
+#     def transform(self, model, node):
+#         bn0 = node.get_input_node()
+#         bn1 = node
 
-        s2 = s0 * s1
-        b2 = s1 * b0 + b1
+#         s0 = bn0.weights['scale'].data
+#         b0 = bn0.weights['bias'].data
+#         s1 = bn1.weights['scale'].data
+#         b1 = bn1.weights['bias'].data
 
-        bn0.weights['scale'].data = s2
-        bn0.weights['bias'].data = b2
+#         s2 = s0 * s1
+#         b2 = s1 * b0 + b1
 
-        model.remove_node(node, rewire=True)
-        return True
+#         bn0.weights['scale'].data = s2
+#         bn0.weights['bias'].data = b2
+
+#         model.remove_node(node, rewire=True)
+#         return True
 
 class ExtractTernaryThreshold(OptimizerPass):
     ''' The input value (threshold) at which the output of a a ternary activation
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
new file mode 100644
index 0000000000..465e073a5c
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -0,0 +1,351 @@
+'''
+This file includes optimizations related to quant nodes.
+
+As a first step, QuantConstantParameters converts the extra inputs to attributes. It is always the first step
+
+The next step differs between the case of (1) unitary scale and zero offset, or (2) nonunitary scale and/or nonzero offset.
+In the first case no scaling is required, so a Quant node effectively becomes a linear activation. For the common case when this
+is applied on a constant weight, the activation is immediately merged with the weight, qantizing the weights. In case 2,
+we need to explictly scale and unscale, so the Quant node becomes 3 nodes, an ApplyAlpha node to apply a scale/shift, a
+Linear node to apply the quantization, and another ApplyAlpha to unscale/shift. We depend on optimization steps to move the
+unscaling ApplyAlpha down as needed. Again, when the Quant is a applied ot a Constant, the scaling and Linear nodes are
+immediately merged into the Constant. This is done because it simplifies some of the other optimizations.
+'''
+from copy import deepcopy
+import numpy as np
+from hls4ml.model.types import FixedPrecisionType
+from hls4ml.model.layers import Quant, Constant
+from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
+from hls4ml.model.optimizer import OptimizerPass
+
+class QuantConstantParameters(OptimizerPass):
+    """ Remove Constant from the Qaunt node parameters (but not input[0]) """
+    def match(self, node):
+        is_match = (isinstance(node, Quant)
+                    and ((node.get_input_node(node.inputs[1])
+                          and isinstance(node.get_input_node(node.inputs[1]), Constant))
+                         or (node.get_input_node(node.inputs[2])
+                             and isinstance(node.get_input_node(node.inputs[2]), Constant))
+                         or (node.get_input_node(node.inputs[3])
+                             and isinstance(node.get_input_node(node.inputs[3]), Constant))))
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove Constant from the Qaunt node parameters (but not input[0])
+        """
+        if node.get_input_node(node.inputs[1]):
+            scale_node = node.get_input_node(node.inputs[1])
+            if isinstance(scale_node, Constant):
+                node.set_attr('scale', scale_node.value)
+                node.inputs[1] = ''
+                model.remove_node(scale_node, rewire=False)
+
+        if node.get_input_node(node.inputs[2]):
+            zeropt_node = node.get_input_node(node.inputs[2])
+            if isinstance(zeropt_node, Constant):
+                node.set_attr('zeropt', zeropt_node.value)
+                node.inputs[2] = ''
+                model.remove_node(zeropt_node, rewire=False)
+
+        if node.get_input_node(node.inputs[3]):
+            bitwidth_node = node.get_input_node(node.inputs[3])
+            if isinstance(bitwidth_node, Constant):
+                node.set_attr('bitwidth', bitwidth_node.value)
+                node.inputs[3] = ''
+                model.remove_node(bitwidth_node, rewire=False)
+
+        return True
+
+
+class QuantToActivation(OptimizerPass):
+    '''
+    This is for the case when scale is 1 and zeropt is 0. It is a a 1:1 transformation of
+    a Quant to an Activation.
+
+    As an optimization, this is not called when the input is constant.
+    '''
+    def match(self, node):
+        # only matches after the other inputs are already folded
+        is_match = (isinstance(node, Quant)
+                    and not isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and not node.get_input_node(node.inputs[1])
+                    and not node.get_input_node(node.inputs[2])
+                    and not node.get_input_node(node.inputs[3]))
+
+        # Only match if the scale is 1s and the zero-point is 0s
+        if is_match: # to make sure this is a quant node with inputs
+            input_shape = node.get_input_variable().shape
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and (scale == np.ones_like(scale)).all()
+            is_match = is_match and (bias == np.zeros_like(bias)).all()
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Change quant node to Activation
+        '''
+        input_shape = node.get_input_variable().shape
+
+        n_in = np.prod(input_shape)
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+
+        attributes = node.attributes
+        attributes.update({
+            'activation' : 'linear',
+            'quant_precision'  : precision,
+            'quantizer'  : quantizer,
+            'n_in'       : n_in
+        })
+
+        new_node = model.make_node('Activation', f'{node.name}_act',
+                                   attributes, [node.inputs[0]], [x for x in node.outputs])
+        new_node.get_output_variable().type.precision = precision
+        model.replace_node(node, new_node)
+
+        return True
+
+
+class FuseQuantWithConstant(OptimizerPass):
+    '''
+    This is for the case when scale is 1 and zeropt is 0. It directly applies the quantization to a constant.
+    '''
+    def match(self, node):
+        # only matches after the other inputs are already folded
+        is_match = (isinstance(node, Quant)
+                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and not node.get_input_node(node.inputs[1])
+                    and not node.get_input_node(node.inputs[2])
+                    and not node.get_input_node(node.inputs[3]))
+
+        # Only match if the scale is 1s and the zero-point is 0s
+        if is_match: # to make sure this is a quant node with inputs
+            input_shape = node.get_input_variable().shape
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and (scale == np.ones_like(scale)).all()
+            is_match = is_match and (bias == np.zeros_like(bias)).all()
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Fuse Quant with Constant.
+        '''
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+
+        const_node = node.get_input_node(node.inputs[0])
+        const_node.set_attr("quant_precision", precision)
+        const_node.set_attr("quantizer", quantizer)
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node.initialize()
+
+        # remove the Quant node
+        model.remove_node(node, rewire=True)
+
+        return True
+
+
+class QuantToAlphaActivationAlpha(OptimizerPass):
+    '''
+    This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
+    a Quant to an ApplyAlpha (to scale), Activatio, ApplyAlpho (to rescale).
+
+    As an optimization, this is not called when the input is constant.
+    '''
+    def match(self, node):
+        # only matches after the other inputs are already folded
+        is_match = (isinstance(node, Quant)
+                    and not isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and not node.get_input_node(node.inputs[1])
+                    and not node.get_input_node(node.inputs[2])
+                    and not node.get_input_node(node.inputs[3]))
+
+        if is_match: # to make sure this is a quant node with inputs
+            input_shape = node.get_input_variable().shape
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Change quant node to ApplyAlhpa, Activation, ApplyAlpha
+        '''
+
+        # Do the Activation as in the simple case
+
+        input_shape = node.get_input_variable().shape
+
+        n_in = np.prod(input_shape)
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+
+        attributes = deepcopy(node.attributes)
+        attributes.update({
+            'activation' : 'linear',
+            'quant_precision'  : precision,
+            'quantizer'  : quantizer,
+            'n_in'       : n_in
+        })
+
+        new_node = model.make_node('Activation', f'{node.name}_act',
+                                   attributes, [node.inputs[0]], [x for x in node.outputs])
+        new_node.get_output_variable().type.precision = precision
+        model.replace_node(node, new_node)
+
+        # but now add the ApplyAlhpas before and after
+
+        scale = node.get_attr("scale")
+        bias = node.get_attr("zeropt")
+
+        attributes_scale = node.attributes
+        attributes_scale.update({
+            'n_in': n_in,
+            'n_out': n_in,
+            'n_filt': -1
+        })
+
+        attributes_rescale = deepcopy(attributes_scale)
+
+        scale_node = model.make_node('ApplyAlpha', node.name + '_scale', attributes_scale, [x for x in node.inputs])
+        firstscale = 1/scale
+        firstbias = bias
+        scale_node.set_attr("scale", firstscale)
+        scale_node.set_attr("bias", firstbias)
+        scale_node.add_weights(firstscale)
+        scale_node.add_bias(firstbias)
+        model.insert_node(scale_node)
+
+        rescale_node = model.make_node('ApplyAlpha', node.name + '_rescale', attributes_rescale, [x for x in new_node.outputs])
+        rescale = scale
+        rebias = -bias*scale
+        rescale_node.set_attr("scale", rescale)
+        rescale_node.set_attr("bias", rebias)
+        rescale_node.add_weights(rescale)
+        rescale_node.add_bias(rebias)
+        model.insert_node(rescale_node)
+
+        return True
+
+
+class ConstQuantToConstAlpha(OptimizerPass):
+    '''
+    This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
+    a Quant to an ApplyAlpha (to scale), Activation, ApplyAlpho (to unscale), but an input
+    consts allows for optimization, so the ApplyAlpha (to scale), Activation are
+    optimized away right away.
+    '''
+    def match(self, node):
+        # only matches after the other inputs are already folded
+        is_match = (isinstance(node, Quant)
+                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and not node.get_input_node(node.inputs[1])
+                    and not node.get_input_node(node.inputs[2])
+                    and not node.get_input_node(node.inputs[3]))
+
+        if is_match: # to make sure this is a quant node with inputs
+            input_shape = node.get_input_variable().shape
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Change Constant + Quant node to Constant, ApplyAlpha
+        '''
+
+        # Do the Activation as in the simple case
+
+        input_shape = node.get_input_variable().shape
+
+        n_in = np.prod(input_shape)
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+
+        const_node = node.get_input_node(node.inputs[0])
+
+        scale = node.get_attr("scale")
+        bias = node.get_attr("zeropt")
+
+        # caclucate the new value
+        new_val = const_node.value / scale + bias
+        const_node.set_attr('value', new_val)
+        const_node.set_attr("quant_precision", precision)
+        const_node.set_attr("quantizer", quantizer)
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node.initialize()
+
+        attributes_rescale = node.attributes
+        attributes_rescale.update({
+            'n_in': n_in,
+            'n_out': n_in,
+            'n_filt': -1
+        })
+
+        rescale_node = model.make_node('ApplyAlpha', node.name + '_rescale', attributes_rescale,
+             [x for x in node.inputs], [x for x in node.outputs])
+        rescale = scale
+        rebias = -bias*scale
+        rescale_node.set_attr("scale", rescale)
+        rescale_node.set_attr("bias", rebias)
+        rescale_node.add_weights(rescale)
+        rescale_node.add_bias(rebias)
+        model.replace_node(node, rescale_node)
+
+        return True
+
+
+def _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode):
+    '''
+    A function to determine the precision and quantizer
+    '''
+    if rounding_mode == "ROUND":
+        bn_round = "AP_RND_CONV"
+    elif rounding_mode == "FLOOR":
+        bn_round =  "AP_TRN"
+    else:
+        raise NotImplementedError(f"Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported.")
+
+    if narrow and not signed:
+        raise NotImplementedError("Narrow mode is only supported for singed numbers.")
+
+    if narrow:
+        bn_sat = "AP_SAT_SYM"
+    else:
+        bn_sat = "AP_SAT"
+
+    if np.squeeze(bitwidth).shape:
+        raise RuntimeError("Only scalar bitwidth values are supporeted by the Quant node")
+    bitwidth = int(bitwidth)
+
+    precision = FixedPrecisionType(bitwidth, bitwidth, signed, bn_round, bn_sat)
+    quantizer = QuantNodeQuantizer(precision)
+    return (precision, quantizer)
+
diff --git a/hls4ml/model/optimizer/passes/reshape_const.py b/hls4ml/model/optimizer/passes/reshape_const.py
new file mode 100644
index 0000000000..3e4ef82a1b
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/reshape_const.py
@@ -0,0 +1,23 @@
+import numpy as np
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import Constant, Reshape
+
+class ReshapeConstant(OptimizerPass):
+    """ Remove Constant from new shape input """
+    def match(self, node):
+        is_match = (isinstance(node, Reshape)
+                    and len(node.inputs) > 1
+                    and node.get_input_node(node.inputs[1]))
+
+        return is_match
+    
+    def transform(self, model, node):
+        """
+        Remove Constant from new shape input. Note, input shape node is already used on initialize
+        """
+        shape_node =  node.get_input_node(node.inputs[1])
+        if not isinstance(shape_node, Constant):
+            raise "Nonconstant shape inputs are not currently suppoerted"
+        model.remove_node(shape_node, rewire=False)
+       
+        return True
\ No newline at end of file
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
new file mode 100755
index 0000000000..5c435ec6d2
--- /dev/null
+++ b/test/pytest/test_qonnx.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+import pytest
+import hls4ml
+import numpy as np
+import qonnx.util.cleanup
+import qonnx.util.to_channels_last
+import urllib
+import os
+# To conveniently run QONNX inference
+from finn.core.modelwrapper import ModelWrapper
+import finn.core.onnx_exec as oxe
+
+def test_tfc_2w2a():
+    # download test model
+    dl_dir = "./"
+    dl_file = dl_dir + "qonnx-tfc-2w2a.onnx"
+    tfc_w2a2_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx"
+    )
+    urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_file = dl_dir + "/qonnx-tfc-2w2a-clean.onnx"
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1,1,28,28)
+    np.random.seed(0)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+    config['LayerName'] = {}
+    config['LayerName']['global_in'] = {'Precision' : 'ap_fixed<16,2>'}
+    config['LayerName']['Dense_MatMul_0'] = {'Precision' : {'accum' : 'ap_int<10>',
+                                                      'result'  : 'ap_int<10>'}}
+    config['LayerName']['Dense_MatMul_1'] = {'Precision' : {'accum' : 'ap_int<10>',
+                                                      'result'  : 'ap_int<10>'}}
+    config['LayerName']['Dense_MatMul_2'] = {'Precision' : {'accum' : 'ap_int<10>',
+                                                      'result'  : 'ap_int<10>'}}
+    config['LayerName']['Dense_MatMul_3'] = {'Precision' : {'accum' : 'ap_int<10>',
+                                                      'result'  : 'ap_int<10>'}}
+    hls_model = hls4ml.converters.convert_from_onnx_model(model,
+                                                          output_dir='hls4mlprj_qonnx_tfc-2w2a',
+                                                          part='xcu250-figd2104-2L-e',
+                                                          hls_config=config)
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
+def test_cnv_2w2a():
+    # download test model
+    dl_dir = "./"
+    dl_file = dl_dir + "qonnx-cnv-2w2a.onnx"
+    cnv_w2a2_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/CIFAR10/Brevitas_FINN_CNV/CNV_2W2A.onnx"
+    )
+    urllib.request.urlretrieve(cnv_w2a2_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_clean = dl_dir + "/qonnx-cnv-2w2a-clean.onnx"
+    out_chanlast = dl_dir + "/qonnx-cnv-2w2a-clean-channels-last.onnx"
+    out_file = dl_dir + "/qonnx-cnv-2w2a-clean-channels-last-clean.onnx"
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_clean)
+    qonnx.util.to_channels_last.to_channels_last(out_clean, make_input_channels_last=True, out_file=out_chanlast)
+    qonnx.util.cleanup.cleanup(out_chanlast, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1,32,32,3)
+    np.random.seed(1)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    config['Model']['Precision'] = 'ap_fixed<32,16>'
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+
+    hls_model = hls4ml.converters.convert_from_onnx_model(model,
+                                                          output_dir='hls4mlprj_qonnx_cnv-2w2a',
+                                                          part='xcu250-figd2104-2L-e',
+                                                          io_type='io_stream',
+                                                          hls_config=config)
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
+if __name__ == '__main__':
+    test_tfc_2w2a()

From 37eed5fc2eadef37dc9cf6c17cc93a8872dedc4d Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 20 Apr 2022 18:05:11 -0500
Subject: [PATCH 07/51] another snapshot, to try on another computer

---
 hls4ml/model/graph.py                         |  2 +-
 hls4ml/model/layers.py                        | 28 +++++---
 hls4ml/model/optimizer/__init__.py            | 18 ++++-
 .../model/optimizer/passes/batchnorm_opt.py   |  6 +-
 .../optimizer/passes/matmul_const_to_dense.py | 66 ++++++++-----------
 hls4ml/model/optimizer/passes/quant_opt.py    | 20 ++++++
 6 files changed, 89 insertions(+), 51 deletions(-)

diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
index 9555e528af..8434f27992 100644
--- a/hls4ml/model/graph.py
+++ b/hls4ml/model/graph.py
@@ -455,7 +455,7 @@ def remove_node(self, node, rewire=True):
             outputs = [outp for outp in node.outputs if outp]
             if len(inputs) > 1 or len(outputs) > 1:
                 raise Exception('Cannot rewire a node with multiple inputs/outputs')
-            prev_node = self.graph.get(node.inputs[0])
+            prev_node = node.get_input_node(node.inputs[0])
             next_node = next((x for x in self.graph.values() if node.outputs[0] in x.inputs), None)
             if prev_node is not None:
                 if next_node is not None:
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 7328e00f2b..6dd9bd2256 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -113,14 +113,16 @@ def _validate_attributes(self):
                 raise Exception('Attribute "{}" of layer {} ({}) not set and no default value is specified.'.format(attr_name, self.name, self.class_name))
 
     def get_input_node(self, input_name=None):
-        if input_name is not None:
-            nodes = [node for node in self.model.graph.values() if input_name in node.outputs]
-            if len(nodes) == 0:
-                return None
+        if input_name is None:
+            if len(self.inputs > 0):
+                input_name = self.inputs[0]
             else:
-                return nodes[0]
+                return None
+        nodes = [node for node in self.model.graph.values() if input_name in node.outputs]
+        if len(nodes) == 0:
+            return None
         else:
-            return self.model.graph.get(self.inputs[0])
+            return nodes[0]
 
     def get_input_variable(self, input_name=None):
         if input_name is not None:
@@ -131,7 +133,7 @@ def get_input_variable(self, input_name=None):
     def get_output_nodes(self, output_name=None):
         if output_name is None:
             output_name = self.outputs[0]
-        return [node for node in self.model.graph.values() if node.inputs[0] == output_name]
+        return [node for node in self.model.graph.values() if output_name in node.inputs]
 
     def get_output_variable(self, output_name=None):
         if output_name is not None:
@@ -809,7 +811,7 @@ def initialize(self):
         dims = inp.dim_names
         self.add_output_variable(shape, dims)
 
-        if not self.get_attr('scale'):
+        if self.get_attr('scale') is None:
             gamma = self.model.get_weights_data(self.name, 'gamma')
             beta = self.model.get_weights_data(self.name, 'beta')
             mean = self.model.get_weights_data(self.name, 'moving_mean')
@@ -820,6 +822,16 @@ def initialize(self):
 
             self.add_weights_variable(name='scale', var_name='s{index}', data=scale)
             self.add_weights_variable(name='bias', var_name='b{index}', data=bias)
+        elif isinstance(self.get_attr('scale'), np.ndarray):
+            self.add_weights_variable('scale', var_name='s{index}',
+                                      data=self.get_attr('scale'),
+                                      precision=self.get_attr("scale_precision"),
+                                      quantizer=self.get_attr("bias_quantizer"))
+            self.add_weights_variable('bias', var_name='b{index}',
+                                      data=self.get_attr('bias'),
+                                      precision=self.get_attr("bias_precision"),
+                                      quantizer=self.get_attr("bias_quantizer"))
+
 
 class ApplyAlpha(BatchNormalization):
     ''' A custom layer to scale the output of a QDense layer which used 'alpha != 1'
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 94865e790a..0a8535bc9e 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -31,7 +31,8 @@
     'scale_down_conv',
     'merge_to_batch_normalization',
     'merge_to_batch_normalization_div',
-
+    'matmul_const_to_dense',
+    'conv_to_conv_x_d',
     ]
 
 try:
@@ -39,10 +40,21 @@
     # TODO Maybe not all QKeras optmizers belong here?
     register_flow('convert', base_convert 
         + ['output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization']) 
-    register_flow('optimize', ['eliminate_linear_activation', 'fuse_consecutive_batch_normalization', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv'], requires=['convert'])
+    register_flow('optimize', [
+        'eliminate_linear_activation', 
+        'fuse_consecutive_batch_normalization', 
+        'fuse_batch_normalization', 
+        'replace_multidimensional_dense_with_conv',
+        'eliminate_linear_activation_quant'
+        ], requires=['convert'])
 except:
     register_flow('convert', base_convert)
-    register_flow('optimize', ['eliminate_linear_activation', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv'], requires=['convert'])
+    register_flow('optimize', [
+        'eliminate_linear_activation', 
+        'fuse_batch_normalization', 
+        'replace_multidimensional_dense_with_conv',
+        'eliminate_linear_activation_quant'
+        ], requires=['convert'])
 
 del opt_path
 del module_path
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index d1c7154c59..3745d86862 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -52,8 +52,10 @@ def transform(self, model, node):
 
         scale = gamma / np.sqrt(moving_variance + node.get_attr('epsilon'))
         bias = beta - gamma * moving_mean / np.sqrt(moving_variance + node.get_attr('epsilon'))
-        node.add_weights_variable("scale", data=scale, precision=node.get_attr("scale_precision"), quantizer=node.get_attr("bias_quantizer"))
-        node.add_weights_variable("bias", data=bias, precision=node.get_attr("bias_precision"), quantizer=node.get_attr("bias_quantizer"))
+        node.set_attr("scale", scale)
+        node.set_attr("bias", bias)
+        #node.add_weights_variable("scale", data=scale, precision=node.get_attr("scale_precision"), quantizer=node.get_attr("bias_quantizer"))
+        #node.add_weights_variable("bias", data=bias, precision=node.get_attr("bias_precision"), quantizer=node.get_attr("bias_quantizer"))
 
         new_node = model.make_node(BatchNormalization, node.name, node.attributes, 
             [node.inputs[0]], [x for x in node.outputs])
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 7798ee7471..5311246e8a 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -1,67 +1,59 @@
 import numpy as np
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import FixedPrecisionType
+from hls4ml.model.types import IntegerPrecisionType
 from hls4ml.model.layers import MatMul, Constant, Dense
+from hls4ml.model.optimizer.passes.quant_opt import propagete_type_mult
 
 class MatmulConstToDense(OptimizerPass):
-    """ Convert MatMul with constant to a dense layer """
+    """
+    Convert MatMul with constant to a dense layer. Note, this only supports the second input
+    being the constant. If needed, one could add transposes to make that be the case in
+    other yet to be written optimizers.
+    """
     def match(self, node):
         is_match = (isinstance(node, MatMul) and len(node.inputs) == 2
-                    and (isinstance(node.get_input_node(node.inputs[0]), Constant)
-                         or isinstance(node.get_input_node(node.inputs[1]), Constant)))
+                    and isinstance(node.get_input_node(node.inputs[1]), Constant))
         return is_match
 
     def transform(self, model, node):
         """ Substitute Matmul + Constant for a single dense """
         #determining Constant layer input
-        matmul_node = node
-        const_node = None
-        const_inp_idx = 0
-        other_inp_idx = 1
-        if isinstance(matmul_node.get_input_node(matmul_node.inputs[0]), Constant):
-            const_node = matmul_node.get_input_node(matmul_node.inputs[0])
-            other_node = matmul_node.get_input_node(matmul_node.inputs[1])
-        else:
-            const_node = matmul_node.get_input_node(matmul_node.inputs[1])
-            other_node = matmul_node.get_input_node(matmul_node.inputs[0])
-            const_inp_idx = 1
-            other_inp_idx = 0
+        const_node = node.get_input_node(node.inputs[1])
+        other_node = node.get_input_node(node.inputs[0])
+        other_var = node.get_input_variable(node.inputs[0])
 
         quant_precision = None
         weight_precision = const_node.get_attr("quant_precision")
+        weight_quantizer = const_node.get_attr("quantizer")
         other_precision = other_node.get_attr("quant_precision")
 
-        if weight_precision and other_precision:
-            if (weight_precision.width != weight_precision.integer
-                or other_precision.width != other_precision.integer):
-                raise ValueError("quant_precisions must always have the same width and integer parameters")
+        in_shape = other_var.shape
+        node.set_attr('n_in', np.prod(in_shape))
+        out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]]
+        node.set_attr('n_out', np.prod(out_shape))
 
-            Nacc = matmul_node.get_input_variable(matmul_node.inputs[0]).shape[-1]
-            bitwidth = weight_precision.width + other_precision.width + int(np.ceil(np.log2(Nacc)))
-            signed = weight_precision.signed or other_precision.signed
-            # copy staruation and rounding from "other"
-            rounding_mode = other_precision.rounding_mode
-            saturation_mode = other_precision.saturation_mode
-            quant_precision = FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+        quant_precision = propagete_type_mult(other_precision, weight_precision, in_shape[-1])
+
+        node.add_weights_variable(name='weight', var_name='w{index}', data=const_node.value,
+                                  precision=weight_precision, quantizer=weight_quantizer)
+        # add a dummy bias
+        # (A real one can be added after with bn_fuse)
+        node.add_weights_variable(name='bias', var_name='b{index}', data=np.zeros(out_shape),
+                                  precision=IntegerPrecisionType(1, False))
 
         #creating the attributes
-        attributes = matmul_node.attributes
-        attributes.update({
-            "weight_data": const_node.value,
+        node.attributes.update({
             "weight_precision": weight_precision,
-            "weight_quantizer": const_node.get_attr("quantizer"),
+            "weight_quantizer": weight_quantizer,
             "quant_precision": quant_precision,
-            "omit_bias": True,
-            "n_in": const_node.value.shape[0],
-            "n_out": const_node.value.shape[1]
         })
 
         #making new node
-        new_dense = model.make_node(Dense, f"Dense_{matmul_node.name}", attributes,
-            [matmul_node.inputs[other_inp_idx]], [x for x in matmul_node.outputs])
+        new_dense = model.make_node(Dense, f"Dense_{node.name}", node.attributes,
+            [node.inputs[0]], [x for x in node.outputs])
 
         #removing and replacing old nodes
         model.remove_node(const_node, rewire=False)
-        model.replace_node(matmul_node, new_dense)
+        model.replace_node(node, new_dense)
 
         return True
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index 465e073a5c..7a5bcc0e0e 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -17,6 +17,7 @@
 from hls4ml.model.layers import Quant, Constant
 from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
 from hls4ml.model.optimizer import OptimizerPass
+from numbers import Integral
 
 class QuantConstantParameters(OptimizerPass):
     """ Remove Constant from the Qaunt node parameters (but not input[0]) """
@@ -349,3 +350,22 @@ def _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode):
     quantizer = QuantNodeQuantizer(precision)
     return (precision, quantizer)
 
+
+def propagete_type_mult(in1: FixedPrecisionType, in2: FixedPrecisionType, num_acc: Integral):
+    '''
+    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
+    are supported. Rounding modes are propagated from in1
+    '''
+    if in2 and in1:
+        if (in2.width != in2.integer
+            or in1.width != in1.integer):
+            raise ValueError("quant_precisions must always have the same width and integer parameters")
+
+        bitwidth = in2.width + in1.width + int(np.ceil(np.log2(num_acc)))
+        signed = in2.signed or in1.signed
+        # copy staruation and rounding from "in1"
+        rounding_mode = in1.rounding_mode
+        saturation_mode = in1.saturation_mode
+        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+    else:
+        return None

From 71228e717c91955e9edf5dc19e4632891b98a7ce Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sat, 23 Apr 2022 15:37:22 -0500
Subject: [PATCH 08/51] fix parsing of dense

---
 hls4ml/converters/onnx/quantizer.py | 16 ++++++++--------
 hls4ml/model/layers.py              |  7 ++++---
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py
index a078aae07f..e2ca9d9814 100644
--- a/hls4ml/converters/onnx/quantizer.py
+++ b/hls4ml/converters/onnx/quantizer.py
@@ -6,7 +6,7 @@
 """
 
 import numpy as np
-from hls4ml.model.types import Quantizer
+from hls4ml.model.types import Quantizer, SaturationMode, RoundingMode
 
 class QuantNodeQuantizer(Quantizer):
     """ This implements a quantizer for a FixedPrecisionType with width==integer"""
@@ -46,9 +46,9 @@ def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
             >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
             int(0)
         """
-        if saturation_mode not in ("AP_SAT_SYM", "AP_SAT"):
+        if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT):
             raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported")
-        if signed and saturation_mode == "AP_SAT_SYM":
+        if signed and saturation_mode == SaturationMode.SAT_SYM:
             value = -(2 ** (bit_width - 1)) + 1
         elif signed:
             value = -(2 ** (bit_width - 1))
@@ -79,14 +79,14 @@ def _max_int(signed: bool, bit_width: int) -> int:
         return value
 
     @staticmethod
-    def _resolve_rounding_mode(mode_string):
-        """Resolve the rounding mode string of Quant and Trunc ops
+    def _resolve_rounding_mode(mode):
+        """Resolve the rounding mode  of Quant and Trunc ops
         to the corresponding numpy functions."""
-        if mode_string == "AP_RND_CONV":
+        if mode == RoundingMode.RND_CONV:
             return np.round
         # elif mode_string == "CEIL":   # not supported
         #     return np.ceil
-        elif mode_string == "AP_TRN":
+        elif mode == RoundingMode.TRN:
             return np.floor
         else:
-            raise ValueError(f"Could not resolve rounding mode called: {mode_string}")
+            raise ValueError(f"Rounding mode {mode} not supported.")
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 6dd9bd2256..94b9395522 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -114,7 +114,7 @@ def _validate_attributes(self):
 
     def get_input_node(self, input_name=None):
         if input_name is None:
-            if len(self.inputs > 0):
+            if len(self.inputs) > 0:
                 input_name = self.inputs[0]
             else:
                 return None
@@ -357,8 +357,9 @@ def initialize(self):
         else:
             dims = ['N_LAYER_{}'.format(self.index)]
         self.add_output_variable(shape, dims)
-        self.add_weights(quantizer=self.get_attr('weight_quantizer'), compression=self.model.config.get_compression(self))
-        self.add_bias(quantizer=self.get_attr('bias_quantizer'))
+        if self.get_attr("weight") is None:
+            self.add_weights(quantizer=self.get_attr('weight_quantizer'), compression=self.model.config.get_compression(self))
+            self.add_bias(quantizer=self.get_attr('bias_quantizer'))
 
 class Conv(Layer):
     """

From 5c8c9f79af88d78cafaec531eecb71a91a290619 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 28 Apr 2022 11:11:17 -0500
Subject: [PATCH 09/51] another snapshot, before updating normalize setup

---
 hls4ml/model/layers.py                        |  7 ++++++-
 hls4ml/model/optimizer/__init__.py            | 20 +++++++++----------
 .../optimizer/passes/matmul_const_to_dense.py |  8 +++++++-
 hls4ml/model/optimizer/passes/nop.py          |  9 ++++++---
 hls4ml/model/types.py                         |  1 +
 5 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 94b9395522..53ef951445 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -161,6 +161,11 @@ def add_output_variable(self, shape, dim_names, out_name=None, var_name='layer{i
 
         self.set_attr(out_name, out)
 
+    def update_output_precision(self, precision, output_name=None):
+        if output_name is None:
+            output_name = self.outputs[0]
+        self.variables[output_name].type.precision = precision
+
     def add_weights(self, quantizer=None, compression=False):
         data = self.model.get_weights_data(self.name, 'kernel')
 
@@ -356,7 +361,7 @@ def initialize(self):
             dims = ['N_LAYER_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)]
         else:
             dims = ['N_LAYER_{}'.format(self.index)]
-        self.add_output_variable(shape, dims)
+        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
         if self.get_attr("weight") is None:
             self.add_weights(quantizer=self.get_attr('weight_quantizer'), compression=self.model.config.get_compression(self))
             self.add_bias(quantizer=self.get_attr('bias_quantizer'))
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 0a8535bc9e..2ee66f3483 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -14,7 +14,7 @@
 
 
 base_convert = [
-    'fuse_bias_add', 
+    'fuse_bias_add',
     'remove_useless_transpose',
     'reshape_constant',
     'quant_constant_parameters',
@@ -38,22 +38,22 @@
 try:
     import qkeras
     # TODO Maybe not all QKeras optmizers belong here?
-    register_flow('convert', base_convert 
-        + ['output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization']) 
+    register_flow('convert', base_convert
+        + ['output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization'])
     register_flow('optimize', [
-        'eliminate_linear_activation', 
-        'fuse_consecutive_batch_normalization', 
-        'fuse_batch_normalization', 
+        'fuse_consecutive_batch_normalization',
+        'fuse_batch_normalization',
         'replace_multidimensional_dense_with_conv',
-        'eliminate_linear_activation_quant'
+        'eliminate_linear_activation_quant',
+        'eliminate_linear_activation',
         ], requires=['convert'])
 except:
     register_flow('convert', base_convert)
     register_flow('optimize', [
-        'eliminate_linear_activation', 
-        'fuse_batch_normalization', 
+        'fuse_batch_normalization',
         'replace_multidimensional_dense_with_conv',
-        'eliminate_linear_activation_quant'
+        'eliminate_linear_activation_quant',
+        'eliminate_linear_activation',
         ], requires=['convert'])
 
 del opt_path
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 5311246e8a..0baa419b20 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -1,6 +1,6 @@
 import numpy as np
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import IntegerPrecisionType
+from hls4ml.model.types import IntegerPrecisionType, NamedType
 from hls4ml.model.layers import MatMul, Constant, Dense
 from hls4ml.model.optimizer.passes.quant_opt import propagete_type_mult
 
@@ -32,6 +32,8 @@ def transform(self, model, node):
         out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]]
         node.set_attr('n_out', np.prod(out_shape))
 
+        node.set_attr('trace', True)
+
         quant_precision = propagete_type_mult(other_precision, weight_precision, in_shape[-1])
 
         node.add_weights_variable(name='weight', var_name='w{index}', data=const_node.value,
@@ -52,6 +54,10 @@ def transform(self, model, node):
         new_dense = model.make_node(Dense, f"Dense_{node.name}", node.attributes,
             [node.inputs[0]], [x for x in node.outputs])
 
+        if quant_precision:
+            accum_t = NamedType('layer{}_accum_t'.format(new_dense.index), quant_precision)
+            new_dense.set_attr('accum_t', accum_t)
+
         #removing and replacing old nodes
         model.remove_node(const_node, rewire=False)
         model.replace_node(node, new_dense)
diff --git a/hls4ml/model/optimizer/passes/nop.py b/hls4ml/model/optimizer/passes/nop.py
index 05104b1e2a..202e857ce2 100644
--- a/hls4ml/model/optimizer/passes/nop.py
+++ b/hls4ml/model/optimizer/passes/nop.py
@@ -1,11 +1,14 @@
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import Activation
 
 class EliminateLinearActivation(OptimizerPass):
     def match(self, node):
         cast = False
-        if node.__class__.__name__ == 'Activation':
+        if isinstance(node, Activation):
             cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision
-        return node.__class__.__name__ == 'Activation' and node.get_attr('activation') == 'linear' and not cast
+            return node.get_attr('activation') == 'linear' and not cast
+        else:
+            return False
     
     def transform(self, model, node):
         model.remove_node(node)
@@ -20,7 +23,7 @@ def match(self, node):
         '''
         Only match if this activation is from quant node and previous node precision is not set  by a quant node already.
         '''
-        is_match = (node.__class__.__name__ == 'Activation' and node.get_attr('activation') == 'linear'
+        is_match = (isinstance(node, Activation) and node.get_attr('activation') == 'linear'
                     and node.get_attr("quant_precision")
                     and not node.get_input_node(node.inputs[0]).get_attr("quant_precision"))
         return is_match
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index 284009f4f1..a42903ff16 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -215,6 +215,7 @@ def __init__(self, tv, input_var):
         and the input_var variable it should link to.
         '''
         self.__dict__.update(tv.__dict__)
+        self.type = input_var.type
         self.input_var = input_var
 
 class WeightVariable(Variable):

From b18a9951ba495647bb018ae7c78c59e8effdfcfd Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 28 Apr 2022 12:02:01 -0500
Subject: [PATCH 10/51] Make the size of bn scale and bias what they really are

---
 hls4ml/backends/vivado/passes/core_templates.py            | 2 ++
 hls4ml/backends/vivado/passes/quantization_templates.py    | 1 +
 hls4ml/templates/vivado/nnet_utils/nnet_batchnorm.h        | 5 +++--
 hls4ml/templates/vivado/nnet_utils/nnet_batchnorm_stream.h | 4 ++--
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/hls4ml/backends/vivado/passes/core_templates.py b/hls4ml/backends/vivado/passes/core_templates.py
index 201562f7fb..7345ed138f 100644
--- a/hls4ml/backends/vivado/passes/core_templates.py
+++ b/hls4ml/backends/vivado/passes/core_templates.py
@@ -57,6 +57,7 @@ def format(self, node):
 batchnorm_config_template = """struct config{index} : nnet::batchnorm_config {{
     static const unsigned n_in = {n_in};
     static const unsigned n_filt = {n_filt};
+    static const unsigned n_scale_bias = {n_scale_bias};
     static const unsigned io_type = nnet::{iotype};
     static const unsigned reuse_factor = {reuse};
     static const bool store_weights_in_bram = false;
@@ -78,6 +79,7 @@ def __init__(self):
     def format(self, node):
         params = self._default_config_params(node)
         params['n_in'] = node.get_input_variable().size_cpp()
+        params['n_scale_bias'] = params['n_in'] if params['n_filt'] == -1 else params['n_filt']
         params['product_type'] = get_backend('vivado').product_type(node.get_input_variable().type.precision, node.get_weights('scale').type.precision)
 
         return self.template.format(**params)
diff --git a/hls4ml/backends/vivado/passes/quantization_templates.py b/hls4ml/backends/vivado/passes/quantization_templates.py
index fa61b81ff7..2b7027f793 100644
--- a/hls4ml/backends/vivado/passes/quantization_templates.py
+++ b/hls4ml/backends/vivado/passes/quantization_templates.py
@@ -11,6 +11,7 @@ def __init__(self):
     def format(self, node):
         params = self._default_config_params(node)
         params['n_in'] = node.get_input_variable().size_cpp()
+        params['n_scale_bias'] = params['n_in'] if params['n_filt'] == -1 else params['n_filt']
         params['product_type'] = get_backend('vivado').product_type(node.get_input_variable().type.precision, node.get_weights('scale').type.precision)
 
         return self.template.format(**params)
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm.h b/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm.h
index edc6ff3205..655ac57fe0 100644
--- a/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm.h
+++ b/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm.h
@@ -36,6 +36,7 @@ struct batchnorm_config
     // Layer Sizes
     static const unsigned n_in = 10;
     static const unsigned n_filt = -1;
+    static const unsigned n_scale_bias = 10;
     
     // Resource reuse info
     static const unsigned io_type = io_parallel;
@@ -51,8 +52,8 @@ template<class data_T, class res_T, typename CONFIG_T>
 void normalize(
     data_T    data[CONFIG_T::n_in],
     res_T     res[CONFIG_T::n_in],
-    typename CONFIG_T::scale_t  scale[CONFIG_T::n_in],
-    typename CONFIG_T::bias_t   bias[CONFIG_T::n_in]
+    typename CONFIG_T::scale_t  scale[CONFIG_T::n_scale_bias],
+    typename CONFIG_T::bias_t   bias[CONFIG_T::n_scale_bias]
 )
 {
     data_T cache;
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm_stream.h
index 826bdafe9a..ce76c01bc3 100644
--- a/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm_stream.h
+++ b/hls4ml/templates/vivado/nnet_utils/nnet_batchnorm_stream.h
@@ -35,8 +35,8 @@ template<class data_T, class res_T, typename CONFIG_T>
 void normalize(
     hls::stream<data_T> &data,
     hls::stream<res_T>  &res,
-    typename CONFIG_T::scale_t scale[CONFIG_T::n_in],
-    typename CONFIG_T::bias_t  bias[CONFIG_T::n_in]
+    typename CONFIG_T::scale_t scale[CONFIG_T::n_scale_bias],
+    typename CONFIG_T::bias_t  bias[CONFIG_T::n_scale_bias]
 ) {
     #pragma HLS ARRAY_PARTITION variable=scale complete
     #pragma HLS ARRAY_PARTITION variable=bias complete

From af4f66ddc9d9e4208556a007f0071bc1dd018a63 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 28 Apr 2022 12:11:22 -0500
Subject: [PATCH 11/51] don't override infered types in test

---
 test/pytest/test_qonnx.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 5c435ec6d2..6ac9f9720f 100755
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -40,14 +40,6 @@ def test_tfc_2w2a():
     # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
     config['LayerName'] = {}
     config['LayerName']['global_in'] = {'Precision' : 'ap_fixed<16,2>'}
-    config['LayerName']['Dense_MatMul_0'] = {'Precision' : {'accum' : 'ap_int<10>',
-                                                      'result'  : 'ap_int<10>'}}
-    config['LayerName']['Dense_MatMul_1'] = {'Precision' : {'accum' : 'ap_int<10>',
-                                                      'result'  : 'ap_int<10>'}}
-    config['LayerName']['Dense_MatMul_2'] = {'Precision' : {'accum' : 'ap_int<10>',
-                                                      'result'  : 'ap_int<10>'}}
-    config['LayerName']['Dense_MatMul_3'] = {'Precision' : {'accum' : 'ap_int<10>',
-                                                      'result'  : 'ap_int<10>'}}
     hls_model = hls4ml.converters.convert_from_onnx_model(model,
                                                           output_dir='hls4mlprj_qonnx_tfc-2w2a',
                                                           part='xcu250-figd2104-2L-e',

From f536faeab5e9ea10504d14e2ebeb9450af89c74f Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 28 Apr 2022 13:03:39 -0500
Subject: [PATCH 12/51] make n_scale_bias not be a python parameter

---
 hls4ml/backends/vivado/passes/core_templates.py         | 3 +--
 hls4ml/backends/vivado/passes/quantization_templates.py | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/hls4ml/backends/vivado/passes/core_templates.py b/hls4ml/backends/vivado/passes/core_templates.py
index 7345ed138f..a2d4cb49e8 100644
--- a/hls4ml/backends/vivado/passes/core_templates.py
+++ b/hls4ml/backends/vivado/passes/core_templates.py
@@ -57,7 +57,7 @@ def format(self, node):
 batchnorm_config_template = """struct config{index} : nnet::batchnorm_config {{
     static const unsigned n_in = {n_in};
     static const unsigned n_filt = {n_filt};
-    static const unsigned n_scale_bias = {n_scale_bias};
+    static const unsigned n_scale_bias = (n_filt == -1) ? n_in : n_filt;
     static const unsigned io_type = nnet::{iotype};
     static const unsigned reuse_factor = {reuse};
     static const bool store_weights_in_bram = false;
@@ -79,7 +79,6 @@ def __init__(self):
     def format(self, node):
         params = self._default_config_params(node)
         params['n_in'] = node.get_input_variable().size_cpp()
-        params['n_scale_bias'] = params['n_in'] if params['n_filt'] == -1 else params['n_filt']
         params['product_type'] = get_backend('vivado').product_type(node.get_input_variable().type.precision, node.get_weights('scale').type.precision)
 
         return self.template.format(**params)
diff --git a/hls4ml/backends/vivado/passes/quantization_templates.py b/hls4ml/backends/vivado/passes/quantization_templates.py
index 2b7027f793..fa61b81ff7 100644
--- a/hls4ml/backends/vivado/passes/quantization_templates.py
+++ b/hls4ml/backends/vivado/passes/quantization_templates.py
@@ -11,7 +11,6 @@ def __init__(self):
     def format(self, node):
         params = self._default_config_params(node)
         params['n_in'] = node.get_input_variable().size_cpp()
-        params['n_scale_bias'] = params['n_in'] if params['n_filt'] == -1 else params['n_filt']
         params['product_type'] = get_backend('vivado').product_type(node.get_input_variable().type.precision, node.get_weights('scale').type.precision)
 
         return self.template.format(**params)

From 9f771a2740efe36c59a65f1916c410aaa736fb8c Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 28 Apr 2022 18:28:55 -0500
Subject: [PATCH 13/51] Add broadcast shape for batchnorm

---
 hls4ml/model/optimizer/__init__.py            | 29 +++++++++----------
 .../model/optimizer/passes/batchnorm_opt.py   |  3 --
 hls4ml/model/optimizer/passes/merge_const.py  |  4 +--
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 2ee66f3483..2e50e6e67f 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -33,28 +33,27 @@
     'merge_to_batch_normalization_div',
     'matmul_const_to_dense',
     'conv_to_conv_x_d',
-    ]
+]
+
+base_optimize = [
+    'fuse_batch_normalization',
+    'replace_multidimensional_dense_with_conv',
+    'eliminate_linear_activation_quant',
+    'eliminate_linear_activation',
+    'broadcast_weights_batch_normalization',
+]
 
 try:
     import qkeras
     # TODO Maybe not all QKeras optmizers belong here?
     register_flow('convert', base_convert
-        + ['output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization'])
-    register_flow('optimize', [
-        'fuse_consecutive_batch_normalization',
-        'fuse_batch_normalization',
-        'replace_multidimensional_dense_with_conv',
-        'eliminate_linear_activation_quant',
-        'eliminate_linear_activation',
-        ], requires=['convert'])
+        + ['output_rounding_saturation_mode', 'qkeras_factorize_alpha',
+           'extract_ternary_threshold', 'fuse_consecutive_batch_normalization'])
+    register_flow('optimize', ['fuse_consecutive_batch_normalization'] + base_optimize,
+                  requires=['convert'])
 except:
     register_flow('convert', base_convert)
-    register_flow('optimize', [
-        'fuse_batch_normalization',
-        'replace_multidimensional_dense_with_conv',
-        'eliminate_linear_activation_quant',
-        'eliminate_linear_activation',
-        ], requires=['convert'])
+    register_flow('optimize', base_optimize, requires=['convert'])
 
 del opt_path
 del module_path
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 3745d86862..7b15676352 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -145,10 +145,7 @@ def transform(self, model, node):
 
         n_filt = node.get_attr('n_filt', -1)
 
-        n_scale_bias = np.prod(input_shape) if n_filt == -1 else n_filt
-
         scale_bias_shape = input_shape if n_filt == -1 else (n_filt,)
-        node.set_attr("n_scale_bias", n_scale_bias)
 
         # Check shape, broadcast if needed.
         if scale.shape != scale_bias_shape:
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index aa0c6ac376..177308bfae 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -1,5 +1,5 @@
 import numpy as np
-from hls4ml.model.layers import Merge, Constant
+from hls4ml.model.layers import Merge, Constant, BatchNormalization
 from hls4ml.model.optimizer import OptimizerPass
 
 
@@ -111,7 +111,7 @@ def transform(self, model, node):
             "n_filt": -1
         })
 
-        bn_layer = model.make_node("BatchNormalization", f"bn_{node.name}",
+        bn_layer = model.make_node(BatchNormalization, f"bn_{node.name}",
                                    attributes,
                                    [node.inputs[input_node_idx]], [x for x in node.outputs])
 

From bef93d9f40ff97f5bea6e1d39f245aa498ae9658 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 28 Apr 2022 21:16:53 -0500
Subject: [PATCH 14/51] make shape comparison more robust

---
 hls4ml/model/optimizer/passes/batchnorm_opt.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 7b15676352..c56251e320 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -148,12 +148,12 @@ def transform(self, model, node):
         scale_bias_shape = input_shape if n_filt == -1 else (n_filt,)
 
         # Check shape, broadcast if needed.
-        if scale.shape != scale_bias_shape:
+        if np.squeeze(scale).shape != tuple(scale_bias_shape):
             node.add_weights_variable(name='scale', data=np.broadcast_to(scale, scale_bias_shape),
                                       precision=node.get_attr("scale_precision"),
                                       quantizer=node.get_attr("scale_quantizer"))
 
-        if bias.shape != scale_bias_shape:
+        if np.squeeze(bias).shape != tuple(scale_bias_shape):
             node.add_weights_variable(name='bias', data=np.broadcast_to(bias, scale_bias_shape),
                                       precision=node.get_attr("bias_precision"),
                                       quantizer=node.get_attr("bias_quantizer"))

From 26d8f672f6020340ea162ed8bb8f675419fd9182 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 29 Apr 2022 08:15:59 -0500
Subject: [PATCH 15/51] create BatchNormalization layer initializer for
 broadcast

---
 hls4ml/backends/vivado/vivado_backend.py      | 24 +++++++++++++
 hls4ml/model/optimizer/__init__.py            |  1 -
 .../model/optimizer/passes/batchnorm_opt.py   | 35 -------------------
 3 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
index 8113022da8..3d82683d2c 100644
--- a/hls4ml/backends/vivado/vivado_backend.py
+++ b/hls4ml/backends/vivado/vivado_backend.py
@@ -195,6 +195,30 @@ def init_depconv2d(self, layer):
         
         layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
+    @layer_optimizer(BatchNormalization)
+    def init_batchnormalization(self, layer):
+        '''Broadcast weights and scale if needed'''
+        input_shape = layer.get_input_variable().shape
+
+        scale = layer.weights['scale'].data_unquantized
+        bias = layer.weights['bias'].data_unquantized
+
+        n_filt = layer.get_attr('n_filt', -1)
+
+        scale_bias_shape = input_shape if n_filt == -1 else (n_filt,)
+
+        # Check shape, broadcast if needed. Don't broadcast if a squeeze makes them match.
+        if scale.shape != tuple(scale_bias_shape) and np.squeeze(scale).shape != tuple(scale_bias_shape):
+            layer.add_weights_variable(name='scale', data=np.broadcast_to(scale, scale_bias_shape),
+                                      precision=layer.get_attr("scale_precision"),
+                                      quantizer=layer.get_attr("scale_quantizer"))
+
+        if bias.shape != tuple(scale_bias_shape) and np.squeeze(bias).shape != tuple(scale_bias_shape):
+            layer.add_weights_variable(name='bias', data=np.broadcast_to(bias, scale_bias_shape),
+                                      precision=layer.get_attr("bias_precision"),
+                                      quantizer=layer.get_attr("bias_quantizer"))
+
+
     @layer_optimizer(Activation)
     def init_activation(self, layer):
         if 'table_t' not in layer.attributes:
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 2e50e6e67f..9f0fece48c 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -40,7 +40,6 @@
     'replace_multidimensional_dense_with_conv',
     'eliminate_linear_activation_quant',
     'eliminate_linear_activation',
-    'broadcast_weights_batch_normalization',
 ]
 
 try:
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index c56251e320..11a84f56ed 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -125,38 +125,3 @@ def transform(self, model, node):
 
         model.remove_node(prev_node, rewire=True)
         return True
-
-
-class BroadcastWeightsBatchNormalization(OptimizerPass):
-    '''
-    The scale and bias need to be broadcast to appropriate size before systhesis
-    '''
-
-    def match(self, node):
-        return isinstance(node, BatchNormalization)
-
-
-    def transform(self, model, node):
-
-        input_shape = node.get_input_variable().shape
-
-        scale = node.weights['scale'].data_unquantized
-        bias = node.weights['bias'].data_unquantized
-
-        n_filt = node.get_attr('n_filt', -1)
-
-        scale_bias_shape = input_shape if n_filt == -1 else (n_filt,)
-
-        # Check shape, broadcast if needed.
-        if np.squeeze(scale).shape != tuple(scale_bias_shape):
-            node.add_weights_variable(name='scale', data=np.broadcast_to(scale, scale_bias_shape),
-                                      precision=node.get_attr("scale_precision"),
-                                      quantizer=node.get_attr("scale_quantizer"))
-
-        if np.squeeze(bias).shape != tuple(scale_bias_shape):
-            node.add_weights_variable(name='bias', data=np.broadcast_to(bias, scale_bias_shape),
-                                      precision=node.get_attr("bias_precision"),
-                                      quantizer=node.get_attr("bias_quantizer"))
-
-        # I think there's no need to restart; also, it prevents an infinite loop
-        return False

From 8b6a8df245559684d1148f18296db6c679070098 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 29 Apr 2022 11:09:43 -0500
Subject: [PATCH 16/51] snapshot, parse CNV, but incorrect result

---
 hls4ml/converters/onnx/convolution.py         |  8 ++--
 hls4ml/model/layers.py                        | 12 ++---
 .../model/optimizer/passes/conv_to_convxd.py  | 45 +++++++++----------
 hls4ml/model/optimizer/passes/quant_opt.py    | 28 ++++++++++++
 4 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py
index 9a8f98b7dd..d96b42351f 100644
--- a/hls4ml/converters/onnx/convolution.py
+++ b/hls4ml/converters/onnx/convolution.py
@@ -25,6 +25,7 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
     if get_onnx_attribute(node, 'group') != 1:
         raise ValueError("Only 1 group supported corrently")
 
+    layer['in_width'] = input_shapes[0][-2]
     layer['n_chan'] = input_shapes[0][-1]
     layer['n_filt'] = input_shapes[1][0]
 
@@ -36,9 +37,9 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
     #set some values needed later
     if layer['n_dim'] == 1:
         # this is 1D convolution
-        full_width = input_shapes[0][-2] + pads[0] + pads[1]
+        full_width = layer['in_width'] + pads[0] + pads[1]
         eff_kernel_width = kernel_shape[0] * dilations[0]
-        layer['n_out'] = int(np.ceil((full_width - eff_kernel_width + 1) / strides[0]))
+        layer['out_width'] = int(np.ceil((full_width - eff_kernel_width + 1) / strides[0]))
         # for compatibility interpret some variables
         layer['pad_left'] = pads[0]
         layer['pad_right'] = pads[1]
@@ -47,7 +48,8 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
         layer['dilation_width'] = dilations[0]
     else:
         # 2d
-        full_height = input_shapes[0][-3] + pads[0] + pads[2]
+        layer['in_height'] = input_shapes[0][-3]
+        full_height = layer['in_height'] + pads[0] + pads[2]
         eff_kernel_height = kernel_shape[0] * dilations[0]
         out_height = int(np.ceil((full_height - eff_kernel_height + 1) / strides[0]))
         layer['out_height'] = out_height
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 53ef951445..d353ed226d 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -377,7 +377,7 @@ def initialize(self):
         # use negative indexing because it is not clear if batch dimension is always stripped
         if self.attributes['n_dim'] == 1:
             # this is 1D convolution
-            shape = [self.attributes['n_out'], self.attributes['n_filt']]
+            shape = [self.attributes['out_width'], self.attributes['n_filt']]
             dims = ['N_OUTPUTS_{}'.format(self.index), 'N_FILT_{}'.format(self.index)]
         else:
             shape = [self.attributes['out_height'], self.attributes['out_width'], self.attributes['n_filt']]
@@ -415,8 +415,9 @@ def initialize(self):
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
 
         self.add_output_variable(shape, dims)
-        self.add_weights(quantizer = self.get_attr('weight_quantizer'))
-        self.add_bias(quantizer = self.get_attr('bias_quantizer'))
+        if self.get_attr("weight") is None:
+            self.add_weights(quantizer = self.get_attr('weight_quantizer'))
+            self.add_bias(quantizer = self.get_attr('bias_quantizer'))
 
 class SeparableConv1D(Layer):
     _expected_attributes = [
@@ -497,8 +498,9 @@ def initialize(self):
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
         self.add_output_variable(shape, dims)
-        self.add_weights(quantizer=self.get_attr('weight_quantizer'))
-        self.add_bias(quantizer=self.get_attr('bias_quantizer'))
+        if self.get_attr("weight") is None:
+            self.add_weights(quantizer=self.get_attr('weight_quantizer'))
+            self.add_bias(quantizer=self.get_attr('bias_quantizer'))
 
 class Conv2DBatchnorm(Conv2D):
     def _get_folded_weights(self):
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index 2229c83e02..a360f71a18 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -1,7 +1,8 @@
 import numpy as np
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import FixedPrecisionType
-from hls4ml.model.layers import Conv, Constant
+from hls4ml.model.types import IntegerPrecisionType
+from hls4ml.model.layers import Conv, Constant, Conv1D, Conv2D
+from hls4ml.model.optimizer.passes.quant_opt import propagete_type_conv
 
 class ConvToConvXD(OptimizerPass):
     """ Convert Conv with constant to a Conv1D or Conv2D layer """
@@ -33,42 +34,36 @@ def transform(self, model, node):
         quant_precision = None
 
         if weight_precision and input_precision and (bias_precision or not bias_node):
-            if (weight_precision.width != weight_precision.integer
-                or input_precision.width != input_precision.integer):
-                raise ValueError("quant_precisions must always have the same width and integer parameters")
-
-            num_feature_maps = weight_node.value.shape[0]
-            Nacc = attributes['filt_width'] * attributes.get('filt_height', 1) * num_feature_maps
-            bitwidth = weight_precision.width + input_precision.width + int(np.ceil(np.log2(Nacc)))
-            signed = weight_precision.signed or input_precision.signed
-            # copy staruation and rounding from "other"
-            rounding_mode = input_precision.rounding_mode
-            saturation_mode = input_precision.saturation_mode
-
-            # correct if bias
-            if bias_node:
-                bitwidth = max(bitwidth + (bias_precision.signed and not signed),
-                               bias_precision.width + (signed and not bias_precision.signed)) + 1
-                signed = signed or bias_precision.signed
-            quant_precision = FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+            quant_precision = propagete_type_conv(input_precision, weight_precision, bias_precision,
+                num_feature_maps=weight_node.value.shape[0], filt_width=attributes['filt_width'],
+                filt_height=attributes.get('filt_height', 1))
 
         #creating the attributes
 
         # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
         if attributes['n_dim'] == 1:
-            nodetype = "Conv1D"
-            attributes["weight_data"] =  np.transpose(weight_node.value, (1, 2, 0))
+            nodetype = Conv1D
+            weight_data =  np.transpose(weight_node.value, (1, 2, 0))
         else:
-            nodetype = "Conv2D"
-            attributes["weight_data"] =  np.transpose(weight_node.value, (1, 2, 3, 0))
+            nodetype = Conv2D
+            weight_data =  np.transpose(weight_node.value, (1, 2, 3, 0))
         attributes["weight_precision"] = weight_precision
         attributes["weight_quantizer"] =  weight_node.get_attr("quantizer")
         attributes["quant_precision"] = quant_precision
 
+        node.add_weights_variable(name='weight', var_name='w{index}', data=weight_data,
+                                  precision=weight_precision, quantizer=attributes['weight_quantizer'])
+ 
         if bias_node:
-            attributes["bias_data"] =  bias_node.value
             attributes["bias_precision"] = bias_precision,
             attributes["bias_quantizer"] =  bias_node.get_attr("quantizer")
+            node.add_weights_variable(name='bias', var_name='b{index}', data=bias_node.value,
+                                      precision=bias_precision, quantizer=attributes['bias_quantizer'])
+        else:
+            node.add_weights_variable(name='bias', var_name='b{index}', data=np.zeros(node.get_output_variable().shape),
+                                      precision=IntegerPrecisionType(1, False))
+
+
 
         #making new node
         new_node = model.make_node(nodetype, f"{nodetype}_{node.name}", attributes,
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index 7a5bcc0e0e..8fdc2b148b 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -369,3 +369,31 @@ def propagete_type_mult(in1: FixedPrecisionType, in2: FixedPrecisionType, num_ac
         return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
     else:
         return None
+
+def propagete_type_conv(input_precision: FixedPrecisionType, weight_precision: FixedPrecisionType, bias_precision: FixedPrecisionType,
+     num_feature_maps: Integral, filt_width: Integral, filt_height: Integral):
+    '''
+    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
+    are supported. Rounding modes are propagated from in1
+    '''
+    if input_precision and weight_precision:
+        if (weight_precision.width != weight_precision.integer
+            or input_precision.width != input_precision.integer):
+            raise ValueError("quant_precisions must always have the same width and integer parameters")
+
+        Nacc = filt_width * filt_height * num_feature_maps
+        bitwidth = weight_precision.width + input_precision.width + int(np.ceil(np.log2(Nacc)))
+        signed = weight_precision.signed or input_precision.signed
+        # copy staruation and rounding from input
+        rounding_mode = input_precision.rounding_mode
+        saturation_mode = input_precision.saturation_mode
+
+        # correct if bias
+        if bias_precision:
+            bitwidth = max(bitwidth + (bias_precision.signed and not signed),
+                            bias_precision.width + (signed and not bias_precision.signed)) + 1
+            signed = signed or bias_precision.signed
+        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+
+    else:
+        return None

From 3956a93fe4cb4841e6d52ee062e47f7c8bb48aa0 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 2 May 2022 20:04:10 -0500
Subject: [PATCH 17/51] another snapshot, towards fixing cnv

---
 hls4ml/model/layers.py                        |  73 +++++++++---
 hls4ml/model/optimizer/__init__.py            |   1 +
 .../model/optimizer/passes/batchnorm_opt.py   |  24 ++--
 .../model/optimizer/passes/conv_to_convxd.py  |  66 +++++++----
 .../optimizer/passes/matmul_const_to_dense.py |  51 ++++++---
 hls4ml/model/optimizer/passes/merge_const.py  |  23 ++--
 hls4ml/model/optimizer/passes/quant_opt.py    | 108 ++++++------------
 7 files changed, 198 insertions(+), 148 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index d353ed226d..2b3b41e69b 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -248,6 +248,7 @@ def get_layer_precision(self):
             precision[data_type.name] = data_type
         return precision
 
+    #TODO  Should move this to a backend pass since it assumes C++ HLS output
     def get_numbers_cpp(self):
         numbers = ''
         for k, v in self.get_output_variable().get_shape():
@@ -255,6 +256,7 @@ def get_numbers_cpp(self):
 
         return numbers
 
+    #TODO same with this (though it doesn't seem to be used, so can maybe just be deleted)
     def precision_cpp(self):
         return 'typedef {precision} layer{index}_t;'.format(precision=self.get_output_variable().precision, index=self.index)
 
@@ -362,6 +364,20 @@ def initialize(self):
         else:
             dims = ['N_LAYER_{}'.format(self.index)]
         self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
+
+        if self.get_attr("weight_data") is not None:
+            weight_data = self.get_attr("weight_data")
+            weight_precision = self.get_attr("weight_precision")
+            weight_quantizer = self.get_attr("weight_quantizer")
+            self.add_weights_variable(name='weight', var_name='w{index}', data=weight_data,
+                                  precision=weight_precision, quantizer=weight_quantizer)
+
+            bias_data = self.get_attr("bias_data")
+            bias_precision = self.get_attr("bias_precision")
+            bias_quantizer = self.get_attr("bias_quantizer")
+            self.add_weights_variable(name='bias', var_name='b{index}', data=bias_data,
+                                  precision=bias_precision, quantizer=bias_quantizer)
+
         if self.get_attr("weight") is None:
             self.add_weights(quantizer=self.get_attr('weight_quantizer'), compression=self.model.config.get_compression(self))
             self.add_bias(quantizer=self.get_attr('bias_quantizer'))
@@ -415,6 +431,20 @@ def initialize(self):
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
 
         self.add_output_variable(shape, dims)
+
+        if self.get_attr("weight_data") is not None:
+            weight_data = self.get_attr("weight_data")
+            weight_precision = self.get_attr("weight_precision")
+            weight_quantizer = self.get_attr("weight_quantizer")
+            self.add_weights_variable(name='weight', var_name='w{index}', data=weight_data,
+                                  precision=weight_precision, quantizer=weight_quantizer)
+
+            bias_data = self.get_attr("bias_data")
+            bias_precision = self.get_attr("bias_precision")
+            bias_quantizer = self.get_attr("bias_quantizer")
+            self.add_weights_variable(name='bias', var_name='b{index}', data=bias_data,
+                                  precision=bias_precision, quantizer=bias_quantizer)
+
         if self.get_attr("weight") is None:
             self.add_weights(quantizer = self.get_attr('weight_quantizer'))
             self.add_bias(quantizer = self.get_attr('bias_quantizer'))
@@ -498,6 +528,20 @@ def initialize(self):
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
         self.add_output_variable(shape, dims)
+
+        if self.get_attr("weight_data") is not None:
+            weight_data = self.get_attr("weight_data")
+            weight_precision = self.get_attr("weight_precision")
+            weight_quantizer = self.get_attr("weight_quantizer")
+            self.add_weights_variable(name='weight', var_name='w{index}', data=weight_data,
+                                  precision=weight_precision, quantizer=weight_quantizer)
+
+            bias_data = self.get_attr("bias_data")
+            bias_precision = self.get_attr("bias_precision")
+            bias_quantizer = self.get_attr("bias_quantizer")
+            self.add_weights_variable(name='bias', var_name='b{index}', data=bias_data,
+                                  precision=bias_precision, quantizer=bias_quantizer)
+
         if self.get_attr("weight") is None:
             self.add_weights(quantizer=self.get_attr('weight_quantizer'))
             self.add_bias(quantizer=self.get_attr('bias_quantizer'))
@@ -819,6 +863,15 @@ def initialize(self):
         dims = inp.dim_names
         self.add_output_variable(shape, dims)
 
+        if self.get_attr("scale_data") is not None:
+            scale = self.get_attr('scale_data')
+            scale_quantizer = self.get_attr('scale_quantizer')
+            bias = self.get_attr('bias_data')
+            bias_quantizer = self.get_attr('bias_quantizer')
+
+            self.add_weights(scale, quantizer=scale_quantizer)
+            self.add_bias(bias, quantizer=bias_quantizer)
+
         if self.get_attr('scale') is None:
             gamma = self.model.get_weights_data(self.name, 'gamma')
             beta = self.model.get_weights_data(self.name, 'beta')
@@ -830,15 +883,12 @@ def initialize(self):
 
             self.add_weights_variable(name='scale', var_name='s{index}', data=scale)
             self.add_weights_variable(name='bias', var_name='b{index}', data=bias)
-        elif isinstance(self.get_attr('scale'), np.ndarray):
-            self.add_weights_variable('scale', var_name='s{index}',
-                                      data=self.get_attr('scale'),
-                                      precision=self.get_attr("scale_precision"),
-                                      quantizer=self.get_attr("bias_quantizer"))
-            self.add_weights_variable('bias', var_name='b{index}',
-                                      data=self.get_attr('bias'),
-                                      precision=self.get_attr("bias_precision"),
-                                      quantizer=self.get_attr("bias_quantizer"))
+
+    def add_weights(self, scale, quantizer=None):
+        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer)
+
+    def add_bias(self, bias, quantizer=None):
+        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer)
 
 
 class ApplyAlpha(BatchNormalization):
@@ -859,11 +909,6 @@ def initialize(self):
         self.add_weights(scale, quantizer=scale_quantizer)
         self.add_bias(bias, quantizer=bias_quantizer)
 
-    def add_weights(self, scale, quantizer=None):
-        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer)
-
-    def add_bias(self, bias, quantizer=None):
-        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer)
 
 class Merge(Layer):
     def initialize(self):
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 9f0fece48c..ee32f2958c 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -20,6 +20,7 @@
     'quant_constant_parameters',
     'quant_to_activation',
     'fuse_quant_with_constant',
+    'quant_to_alpha_activation_alpha',
     'const_quant_to_const_alpha',
     'batch_norm_onnx_constant_parameters',
     'constant_batch_norm_fusion',
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 11a84f56ed..e68b2c0ee0 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -2,6 +2,8 @@
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
 
+_base_attributes = ('Trace', 'reuse_factor', 'n_in', 'n_filt')
+
 class BatchNormOnnxConstantParameters(OptimizerPass):
     """ Remove Constant from the BatchNormalization node parameters (but not input[0]) """
     def match(self, node):
@@ -18,11 +20,13 @@ def transform(self, model, node):
         if not (len(node.inputs) == 5 and all(node.inputs)):
             raise ValueError(f"All {len.node.inputs} BatchNormOnnnx inputs need to be defined")
 
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+
         gamma_node = node.get_input_node(node.inputs[1])
         if not isinstance(gamma_node, Constant):
             raise TypeError("Only consant gammas supported")
         gamma = gamma_node.value
-        node.set_attr('gamma', gamma)
+        attributes['gamma'] = gamma
         node.inputs[1] = ''
         model.remove_node(gamma_node, rewire=False)
 
@@ -30,7 +34,7 @@ def transform(self, model, node):
         if not isinstance(beta_node, Constant):
             raise TypeError("Only consant betas supported")
         beta = beta_node.value
-        node.set_attr('beta', beta)
+        attributes['beta'] = beta
         node.inputs[2] = ''
         model.remove_node(beta_node, rewire=False)
 
@@ -38,7 +42,7 @@ def transform(self, model, node):
         if not isinstance(moving_mean_node, Constant):
             raise TypeError("Only consant moving_means supported")
         moving_mean = moving_mean_node.value
-        node.set_attr('moving_mean', moving_mean)
+        attributes['moving_mean'] = moving_mean
         node.inputs[3] = ''
         model.remove_node(moving_mean_node, rewire=False)
 
@@ -46,18 +50,16 @@ def transform(self, model, node):
         if not isinstance(moving_variance_node, Constant):
             raise TypeError("Only consant moving_variances supported")
         moving_variance = moving_variance_node.value
-        node.set_attr('moving_variance', moving_variance)
+        attributes['moving_variance'] = moving_variance
         node.inputs[4] = ''
         model.remove_node(moving_variance_node, rewire=False)
 
         scale = gamma / np.sqrt(moving_variance + node.get_attr('epsilon'))
         bias = beta - gamma * moving_mean / np.sqrt(moving_variance + node.get_attr('epsilon'))
-        node.set_attr("scale", scale)
-        node.set_attr("bias", bias)
-        #node.add_weights_variable("scale", data=scale, precision=node.get_attr("scale_precision"), quantizer=node.get_attr("bias_quantizer"))
-        #node.add_weights_variable("bias", data=bias, precision=node.get_attr("bias_precision"), quantizer=node.get_attr("bias_quantizer"))
+        attributes["scale_data"] = scale
+        attributes["bias_data"] = bias
 
-        new_node = model.make_node(BatchNormalization, node.name, node.attributes, 
+        new_node = model.make_node(BatchNormalization, node.name, attributes,
             [node.inputs[0]], [x for x in node.outputs])
 
         model.replace_node(node, new_node)
@@ -120,8 +122,8 @@ def transform(self, model, node):
         bias_new = s1 * b0 + b1
 
         # call function so that quantizer would be called if needed
-        node.add_weights_variable(name='scale', data=scale_new, precision=node.get_attr("scale_precision"), quantizer=node.get_attr("scale_quantizer"))
-        node.add_weights_variable(name='bias', data=bias_new, precision=node.get_attr("bias_precision"), quantizer=node.get_attr("bias_quantizer"))
+        node.add_weights(scale_new, quantizer=node.get_attr("scale_quantizer"))
+        node.add_bias(bias_new, quantizer=node.get_attr("bias_quantizer"))
 
         model.remove_node(prev_node, rewire=True)
         return True
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index a360f71a18..86ded75a43 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -1,8 +1,13 @@
 import numpy as np
+import math  # prefer to use math.ceil for scalar values (returns int)
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import IntegerPrecisionType
+from hls4ml.model.types import IntegerPrecisionType, FixedPrecisionType
 from hls4ml.model.layers import Conv, Constant, Conv1D, Conv2D
-from hls4ml.model.optimizer.passes.quant_opt import propagete_type_conv
+from numbers import Integral
+
+_base_attributes = ('Trace', 'reuse_factor', 'in_width', 'out_width', 'n_chan', 'n_filt', 'pad_left', 'pad_right',
+    'filt_width', 'stride_width', 'dilation_width', 'in_height', 'out_height', 'pad_top', 'pad_bottom',
+    'filt_height', 'stride_height', 'dilation_height')
 
 class ConvToConvXD(OptimizerPass):
     """ Convert Conv with constant to a Conv1D or Conv2D layer """
@@ -28,45 +33,38 @@ def transform(self, model, node):
             bias_node = node.get_input_node(node.inputs[2])
             bias_precision = bias_node.get_attr("quant_precision")
 
-        # copy the attributes to the new node. (No need to explictily copy since the old node is deleted)
-        attributes = node.attributes
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
 
         quant_precision = None
 
         if weight_precision and input_precision and (bias_precision or not bias_node):
-            quant_precision = propagete_type_conv(input_precision, weight_precision, bias_precision,
+            quant_precision = propagate_type_conv(input_precision, weight_precision, bias_precision,
                 num_feature_maps=weight_node.value.shape[0], filt_width=attributes['filt_width'],
                 filt_height=attributes.get('filt_height', 1))
 
         #creating the attributes
 
         # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
-        if attributes['n_dim'] == 1:
-            nodetype = Conv1D
-            weight_data =  np.transpose(weight_node.value, (1, 2, 0))
+        if node.attributes['n_dim'] == 1:
+            newtype = Conv1D
+            attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 0))
         else:
-            nodetype = Conv2D
-            weight_data =  np.transpose(weight_node.value, (1, 2, 3, 0))
+            newtype = Conv2D
+            attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 3, 0))
         attributes["weight_precision"] = weight_precision
         attributes["weight_quantizer"] =  weight_node.get_attr("quantizer")
         attributes["quant_precision"] = quant_precision
-
-        node.add_weights_variable(name='weight', var_name='w{index}', data=weight_data,
-                                  precision=weight_precision, quantizer=attributes['weight_quantizer'])
  
         if bias_node:
+            attributes["bias_data"] = bias_node.value,
             attributes["bias_precision"] = bias_precision,
             attributes["bias_quantizer"] =  bias_node.get_attr("quantizer")
-            node.add_weights_variable(name='bias', var_name='b{index}', data=bias_node.value,
-                                      precision=bias_precision, quantizer=attributes['bias_quantizer'])
         else:
-            node.add_weights_variable(name='bias', var_name='b{index}', data=np.zeros(node.get_output_variable().shape),
-                                      precision=IntegerPrecisionType(1, False))
-
-
+            attributes["bias_data"] = np.zeros(node.get_output_variable().shape)
+            attributes["bias_precision"] = IntegerPrecisionType(1, False)
 
         #making new node
-        new_node = model.make_node(nodetype, f"{nodetype}_{node.name}", attributes,
+        new_node = model.make_node(newtype, f"{newtype.__name__}_{node.name}", attributes,
             [node.inputs[0]], [x for x in node.outputs])
 
         #removing and replacing old nodes
@@ -76,3 +74,31 @@ def transform(self, model, node):
         model.replace_node(node, new_node)
 
         return True
+
+def propagate_type_conv(input_precision: FixedPrecisionType, weight_precision: FixedPrecisionType, bias_precision: FixedPrecisionType,
+     num_feature_maps: Integral, filt_width: Integral, filt_height: Integral):
+    '''
+    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
+    are supported. Rounding modes are propagated from in1
+    '''
+    if input_precision and weight_precision:
+        if (weight_precision.width != weight_precision.integer
+            or input_precision.width != input_precision.integer):
+            raise ValueError("quant_precisions must always have the same width and integer parameters")
+
+        Nacc = filt_width * filt_height * num_feature_maps
+        bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc))
+        signed = weight_precision.signed or input_precision.signed
+        # copy staruation and rounding from input
+        rounding_mode = input_precision.rounding_mode
+        saturation_mode = input_precision.saturation_mode
+
+        # correct if bias
+        if bias_precision:
+            bitwidth = max(bitwidth + (bias_precision.signed and not signed),
+                            bias_precision.width + (signed and not bias_precision.signed)) + 1
+            signed = signed or bias_precision.signed
+        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+
+    else:
+        return None
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 0baa419b20..dc12b19e1b 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -1,8 +1,11 @@
 import numpy as np
+import math  # prefer to use math.ceil for scalar values (returns int)
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import IntegerPrecisionType, NamedType
+from hls4ml.model.types import IntegerPrecisionType, NamedType, FixedPrecisionType
 from hls4ml.model.layers import MatMul, Constant, Dense
-from hls4ml.model.optimizer.passes.quant_opt import propagete_type_mult
+from numbers import Integral
+
+_base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t')
 
 class MatmulConstToDense(OptimizerPass):
     """
@@ -28,30 +31,27 @@ def transform(self, model, node):
         other_precision = other_node.get_attr("quant_precision")
 
         in_shape = other_var.shape
-        node.set_attr('n_in', np.prod(in_shape))
+        n_in =  np.prod(in_shape)
         out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]]
-        node.set_attr('n_out', np.prod(out_shape))
-
-        node.set_attr('trace', True)
-
-        quant_precision = propagete_type_mult(other_precision, weight_precision, in_shape[-1])
+        n_out = np.prod(out_shape)
 
-        node.add_weights_variable(name='weight', var_name='w{index}', data=const_node.value,
-                                  precision=weight_precision, quantizer=weight_quantizer)
-        # add a dummy bias
-        # (A real one can be added after with bn_fuse)
-        node.add_weights_variable(name='bias', var_name='b{index}', data=np.zeros(out_shape),
-                                  precision=IntegerPrecisionType(1, False))
+        quant_precision = propagate_type_mult(other_precision, weight_precision, in_shape[-1])
 
         #creating the attributes
-        node.attributes.update({
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes.update({
+            "weight_data": const_node.value,
             "weight_precision": weight_precision,
             "weight_quantizer": weight_quantizer,
+            "bias_data": np.zeros(out_shape),
+            "bias_precision": IntegerPrecisionType(1, False),
             "quant_precision": quant_precision,
+            "n_in": n_in,
+            "n_out": n_out
         })
 
         #making new node
-        new_dense = model.make_node(Dense, f"Dense_{node.name}", node.attributes,
+        new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes,
             [node.inputs[0]], [x for x in node.outputs])
 
         if quant_precision:
@@ -63,3 +63,22 @@ def transform(self, model, node):
         model.replace_node(node, new_dense)
 
         return True
+
+def propagate_type_mult(in1: FixedPrecisionType, in2: FixedPrecisionType, num_acc: Integral):
+    '''
+    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
+    are supported. Rounding modes are propagated from in1
+    '''
+    if in2 and in1:
+        if (in2.width != in2.integer
+            or in1.width != in1.integer):
+            raise ValueError("quant_precisions must always have the same width and integer parameters")
+
+        bitwidth = in2.width + in1.width + math.ceil(np.log2(num_acc))
+        signed = in2.signed or in1.signed
+        # copy staruation and rounding from "in1"
+        rounding_mode = in1.rounding_mode
+        saturation_mode = in1.saturation_mode
+        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
+    else:
+        return None
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 177308bfae..f8a3d04e7a 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -2,6 +2,9 @@
 from hls4ml.model.layers import Merge, Constant, BatchNormalization
 from hls4ml.model.optimizer import OptimizerPass
 
+_base_attributes = ('Trace', 'reuse_factor', 'n_in')
+
+#TODO This doesn't yet support quantization in the constants
 
 class MergeTwoConstants(OptimizerPass):
     """ Merge of two constants makes another constant """
@@ -101,11 +104,10 @@ def transform(self, model, node):
             scale = const_node.value
             bias = np.array(0)
 
-        attributes = node.attributes
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({
-            "simple": True,
-            "scale": scale,
-            "bias": bias,
+            "scale_data": scale,
+            "bias_data": bias,
             "n_in": n_in,
             "n_out": n_in,
             "n_filt": -1
@@ -136,16 +138,15 @@ def transform(self, model, node):
         scale = 1/const_node.value
         bias = np.array(0)
 
-        attributes = {
-            "simple": True,
-            "scale": scale,
-            "bias": bias,
-            "quant_precision": node.get_attr("quant_precision"),
-            "quantizer": node.get_attr("quantizer"),
+
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes.update({
+            "scale_data": scale,
+            "bias_data": bias,
             "n_in": n_in,
             "n_out": n_in,
             "n_filt": -1
-        }
+        })
 
         bn_layer = model.make_node("BatchNormalization", f"bn_{node.name}",
                                    attributes,
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index 8fdc2b148b..d09cf5ffe7 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -13,11 +13,13 @@
 '''
 from copy import deepcopy
 import numpy as np
+import math  # prefer to use math.ceil for scalar values
 from hls4ml.model.types import FixedPrecisionType
-from hls4ml.model.layers import Quant, Constant
+from hls4ml.model.layers import Quant, Constant, Activation, ApplyAlpha
 from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
 from hls4ml.model.optimizer import OptimizerPass
-from numbers import Integral
+
+_base_attributes = ('Trace', 'reuse_factor')
 
 class QuantConstantParameters(OptimizerPass):
     """ Remove Constant from the Qaunt node parameters (but not input[0]) """
@@ -53,6 +55,8 @@ def transform(self, model, node):
         if node.get_input_node(node.inputs[3]):
             bitwidth_node = node.get_input_node(node.inputs[3])
             if isinstance(bitwidth_node, Constant):
+                if np.squeeze(bitwidth_node.value).shape:
+                    raise RuntimeError("Only scalar bitwidth values are supporeted by the Quant node")
                 node.set_attr('bitwidth', bitwidth_node.value)
                 node.inputs[3] = ''
                 model.remove_node(bitwidth_node, rewire=False)
@@ -99,7 +103,7 @@ def transform(self, model, node):
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
 
-        attributes = node.attributes
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({
             'activation' : 'linear',
             'quant_precision'  : precision,
@@ -107,7 +111,7 @@ def transform(self, model, node):
             'n_in'       : n_in
         })
 
-        new_node = model.make_node('Activation', f'{node.name}_act',
+        new_node = model.make_node(Activation, f'{node.name}_act',
                                    attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
         model.replace_node(node, new_node)
@@ -201,7 +205,7 @@ def transform(self, model, node):
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
 
-        attributes = deepcopy(node.attributes)
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({
             'activation' : 'linear',
             'quant_precision'  : precision,
@@ -209,7 +213,7 @@ def transform(self, model, node):
             'n_in'       : n_in
         })
 
-        new_node = model.make_node('Activation', f'{node.name}_act',
+        new_node = model.make_node(Activation, f'{node.name}_act',
                                    attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
         model.replace_node(node, new_node)
@@ -219,31 +223,34 @@ def transform(self, model, node):
         scale = node.get_attr("scale")
         bias = node.get_attr("zeropt")
 
-        attributes_scale = node.attributes
+        attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes_scale.update({
             'n_in': n_in,
             'n_out': n_in,
             'n_filt': -1
         })
 
-        attributes_rescale = deepcopy(attributes_scale)
+        attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes_rescale.update({
+            'n_in': n_in,
+            'n_out': n_in,
+            'n_filt': -1
+        })
 
-        scale_node = model.make_node('ApplyAlpha', node.name + '_scale', attributes_scale, [x for x in node.inputs])
         firstscale = 1/scale
         firstbias = bias
-        scale_node.set_attr("scale", firstscale)
-        scale_node.set_attr("bias", firstbias)
-        scale_node.add_weights(firstscale)
-        scale_node.add_bias(firstbias)
+        attributes_scale["scale_data"] = firstscale
+        attributes_scale["bias_data"] = firstbias
+
+        scale_node = model.make_node(ApplyAlpha, node.name + '_scale', attributes_scale, [node.inputs[0]])
         model.insert_node(scale_node)
 
-        rescale_node = model.make_node('ApplyAlpha', node.name + '_rescale', attributes_rescale, [x for x in new_node.outputs])
         rescale = scale
         rebias = -bias*scale
-        rescale_node.set_attr("scale", rescale)
-        rescale_node.set_attr("bias", rebias)
-        rescale_node.add_weights(rescale)
-        rescale_node.add_bias(rebias)
+        attributes_rescale["scale_data"] = rescale
+        attributes_rescale["bias_data"] = rebias
+
+        rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale, [new_node.outputs[0]])
         model.insert_node(rescale_node)
 
         return True
@@ -303,21 +310,20 @@ def transform(self, model, node):
         # reinitialize (which also runs quantization if quantizer exists)
         const_node.initialize()
 
-        attributes_rescale = node.attributes
+        attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes_rescale.update({
             'n_in': n_in,
             'n_out': n_in,
             'n_filt': -1
         })
 
-        rescale_node = model.make_node('ApplyAlpha', node.name + '_rescale', attributes_rescale,
-             [x for x in node.inputs], [x for x in node.outputs])
         rescale = scale
         rebias = -bias*scale
-        rescale_node.set_attr("scale", rescale)
-        rescale_node.set_attr("bias", rebias)
-        rescale_node.add_weights(rescale)
-        rescale_node.add_bias(rebias)
+        attributes_rescale["scale_data"] = rescale
+        attributes_rescale["bias_data"] = rebias
+
+        rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale,
+             [x for x in node.inputs], [x for x in node.outputs])
         model.replace_node(node, rescale_node)
 
         return True
@@ -342,58 +348,8 @@ def _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode):
     else:
         bn_sat = "AP_SAT"
 
-    if np.squeeze(bitwidth).shape:
-        raise RuntimeError("Only scalar bitwidth values are supporeted by the Quant node")
-    bitwidth = int(bitwidth)
+    bitwidth = math.ceil(bitwidth)
 
     precision = FixedPrecisionType(bitwidth, bitwidth, signed, bn_round, bn_sat)
     quantizer = QuantNodeQuantizer(precision)
     return (precision, quantizer)
-
-
-def propagete_type_mult(in1: FixedPrecisionType, in2: FixedPrecisionType, num_acc: Integral):
-    '''
-    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
-    are supported. Rounding modes are propagated from in1
-    '''
-    if in2 and in1:
-        if (in2.width != in2.integer
-            or in1.width != in1.integer):
-            raise ValueError("quant_precisions must always have the same width and integer parameters")
-
-        bitwidth = in2.width + in1.width + int(np.ceil(np.log2(num_acc)))
-        signed = in2.signed or in1.signed
-        # copy staruation and rounding from "in1"
-        rounding_mode = in1.rounding_mode
-        saturation_mode = in1.saturation_mode
-        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
-    else:
-        return None
-
-def propagete_type_conv(input_precision: FixedPrecisionType, weight_precision: FixedPrecisionType, bias_precision: FixedPrecisionType,
-     num_feature_maps: Integral, filt_width: Integral, filt_height: Integral):
-    '''
-    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
-    are supported. Rounding modes are propagated from in1
-    '''
-    if input_precision and weight_precision:
-        if (weight_precision.width != weight_precision.integer
-            or input_precision.width != input_precision.integer):
-            raise ValueError("quant_precisions must always have the same width and integer parameters")
-
-        Nacc = filt_width * filt_height * num_feature_maps
-        bitwidth = weight_precision.width + input_precision.width + int(np.ceil(np.log2(Nacc)))
-        signed = weight_precision.signed or input_precision.signed
-        # copy staruation and rounding from input
-        rounding_mode = input_precision.rounding_mode
-        saturation_mode = input_precision.saturation_mode
-
-        # correct if bias
-        if bias_precision:
-            bitwidth = max(bitwidth + (bias_precision.signed and not signed),
-                            bias_precision.width + (signed and not bias_precision.signed)) + 1
-            signed = signed or bias_precision.signed
-        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
-
-    else:
-        return None

From a220ad56580c7471874b7090df65e801d02e43cc Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 3 May 2022 15:50:22 -0500
Subject: [PATCH 18/51] add strategy to values copied

---
 hls4ml/model/optimizer/passes/conv_to_convxd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index 86ded75a43..72f837b387 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -7,7 +7,7 @@
 
 _base_attributes = ('Trace', 'reuse_factor', 'in_width', 'out_width', 'n_chan', 'n_filt', 'pad_left', 'pad_right',
     'filt_width', 'stride_width', 'dilation_width', 'in_height', 'out_height', 'pad_top', 'pad_bottom',
-    'filt_height', 'stride_height', 'dilation_height')
+    'filt_height', 'stride_height', 'dilation_height', 'strategy')
 
 class ConvToConvXD(OptimizerPass):
     """ Convert Conv with constant to a Conv1D or Conv2D layer """

From 71300fb02324e01422d0c1ed48835612afb0683a Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 3 May 2022 16:49:57 -0500
Subject: [PATCH 19/51] Fix CNV parsing

---
 hls4ml/model/layers.py                          | 10 +++++-----
 hls4ml/model/optimizer/passes/conv_to_convxd.py |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 2b3b41e69b..c700c581e9 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -430,7 +430,7 @@ def initialize(self):
             shape = [self.attributes['n_filt'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
 
-        self.add_output_variable(shape, dims)
+        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
 
         if self.get_attr("weight_data") is not None:
             weight_data = self.get_attr("weight_data")
@@ -479,7 +479,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_filt'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
-        self.add_output_variable(shape, dims)
+        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
 
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         pointwise_data = self.model.get_weights_data(self.name, 'pointwise_kernel')
@@ -527,7 +527,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
-        self.add_output_variable(shape, dims)
+        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
 
         if self.get_attr("weight_data") is not None:
             weight_data = self.get_attr("weight_data")
@@ -628,7 +628,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
-        self.add_output_variable(shape, dims)
+        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
 
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         pointwise_data = self.model.get_weights_data(self.name, 'pointwise_kernel')
@@ -649,7 +649,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_chan'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_CHAN_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
-        self.add_output_variable(shape, dims)
+        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
 
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         self.add_weights_variable(name='weight', var_name='w{index}', data=depthwise_data, quantizer=self.get_attr('depthwise_quantizer'))
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index 72f837b387..9caff14d5d 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -7,7 +7,7 @@
 
 _base_attributes = ('Trace', 'reuse_factor', 'in_width', 'out_width', 'n_chan', 'n_filt', 'pad_left', 'pad_right',
     'filt_width', 'stride_width', 'dilation_width', 'in_height', 'out_height', 'pad_top', 'pad_bottom',
-    'filt_height', 'stride_height', 'dilation_height', 'strategy')
+    'filt_height', 'stride_height', 'dilation_height', 'strategy', 'data_format')
 
 class ConvToConvXD(OptimizerPass):
     """ Convert Conv with constant to a Conv1D or Conv2D layer """
@@ -60,7 +60,7 @@ def transform(self, model, node):
             attributes["bias_precision"] = bias_precision,
             attributes["bias_quantizer"] =  bias_node.get_attr("quantizer")
         else:
-            attributes["bias_data"] = np.zeros(node.get_output_variable().shape)
+            attributes["bias_data"] = np.zeros(attributes['n_filt'])
             attributes["bias_precision"] = IntegerPrecisionType(1, False)
 
         #making new node

From b3d7f4935e51a37c0ed45bd50be23cebeb1c04c1 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 5 May 2022 19:31:02 -0500
Subject: [PATCH 20/51] Ingest qonnx jettagging (#538)

* allow propagating types when width != integer

* add special cases for power of 2

* move infering of precision to separate optimizer

* update batchnorm and banchnorm/Dense/Conv fusion

* propagate const information in merge with const (e.g. add node for bias)

* restrict propagation of types for accumulator only when set by quant nodes
---
 hls4ml/converters/onnx/quantizer.py           | 11 +--
 hls4ml/model/layers.py                        | 12 ++--
 hls4ml/model/optimizer/__init__.py            |  2 +
 .../model/optimizer/passes/batchnorm_opt.py   | 44 ++++++++++--
 hls4ml/model/optimizer/passes/bn_fuse.py      | 72 ++++++++++++++-----
 .../model/optimizer/passes/conv_to_convxd.py  | 47 +-----------
 .../optimizer/passes/matmul_const_to_dense.py | 30 --------
 hls4ml/model/optimizer/passes/merge_const.py  | 29 +++++++-
 .../passes/propagate_conv_precision.py        | 65 +++++++++++++++++
 .../passes/propagate_dense_precision.py       | 61 ++++++++++++++++
 hls4ml/model/optimizer/passes/quant_opt.py    | 60 +++++++++++++---
 test/pytest/test_qonnx.py                     | 38 ++++++++++
 12 files changed, 354 insertions(+), 117 deletions(-)
 create mode 100644 hls4ml/model/optimizer/passes/propagate_conv_precision.py
 create mode 100644 hls4ml/model/optimizer/passes/propagate_dense_precision.py

diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py
index e2ca9d9814..694eb42967 100644
--- a/hls4ml/converters/onnx/quantizer.py
+++ b/hls4ml/converters/onnx/quantizer.py
@@ -1,6 +1,6 @@
 """
-Quantizer for the Quant node, after scale and zeropoint hafe been extracted.
-(Thus at this point they are 1 and 0.)
+Quantizer for the Quant node, after scale and zeropoint hafe been extracted
+(unless scale is a power of 2, if doing special case po2)
 
 This is based on the sample implementation in finn-base
 """
@@ -11,11 +11,14 @@
 class QuantNodeQuantizer(Quantizer):
     """ This implements a quantizer for a FixedPrecisionType with width==integer"""
     def __init__(self, precision):
-        assert(precision.width == precision.integer)
         super().__init__(precision.width, precision)
 
     def __call__(self, data):
         """ Apply the quantization on the data """
+
+        scale = 2**(self.hls_type.width - self.hls_type.integer)
+
+        data = data * scale  # (not using *= to avoid modifying data)
         # Clamping
         min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits)
         max_int_val = self._max_int(self.hls_type.signed, self.bits)
@@ -23,7 +26,7 @@ def __call__(self, data):
         data = np.where(data < min_int_val, min_int_val, data)
         # Rounding
         rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode)
-        return rounding_fx(data)
+        return rounding_fx(data) / scale
 
 
     @staticmethod
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index c700c581e9..60e91b6fb4 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -363,7 +363,7 @@ def initialize(self):
             dims = ['N_LAYER_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)]
         else:
             dims = ['N_LAYER_{}'.format(self.index)]
-        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
+        self.add_output_variable(shape, dims)
 
         if self.get_attr("weight_data") is not None:
             weight_data = self.get_attr("weight_data")
@@ -430,7 +430,7 @@ def initialize(self):
             shape = [self.attributes['n_filt'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
 
-        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
+        self.add_output_variable(shape, dims)
 
         if self.get_attr("weight_data") is not None:
             weight_data = self.get_attr("weight_data")
@@ -479,7 +479,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_filt'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
-        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
+        self.add_output_variable(shape, dims)
 
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         pointwise_data = self.model.get_weights_data(self.name, 'pointwise_kernel')
@@ -527,7 +527,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
-        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
+        self.add_output_variable(shape, dims)
 
         if self.get_attr("weight_data") is not None:
             weight_data = self.get_attr("weight_data")
@@ -628,7 +628,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
-        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
+        self.add_output_variable(shape, dims)
 
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         pointwise_data = self.model.get_weights_data(self.name, 'pointwise_kernel')
@@ -649,7 +649,7 @@ def initialize(self):
         else:
             shape = [self.attributes['n_chan'], self.attributes['out_height'], self.attributes['out_width']]
             dims = ['N_CHAN_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
-        self.add_output_variable(shape, dims, precision=self.get_attr("quant_precision"))
+        self.add_output_variable(shape, dims)
 
         depthwise_data = self.model.get_weights_data(self.name, 'depthwise_kernel')
         self.add_weights_variable(name='weight', var_name='w{index}', data=depthwise_data, quantizer=self.get_attr('depthwise_quantizer'))
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index ee32f2958c..28d157253d 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -41,6 +41,8 @@
     'replace_multidimensional_dense_with_conv',
     'eliminate_linear_activation_quant',
     'eliminate_linear_activation',
+    'propagate_dense_precision',
+    'propagate_conv_precision',
 ]
 
 try:
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index e68b2c0ee0..af9812449b 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -1,3 +1,4 @@
+from audioop import bias
 import numpy as np
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
@@ -105,9 +106,30 @@ class FuseConsecutiveBatchNormalization(OptimizerPass):
     '''
 
     def match(self, node):
-        return (isinstance(node, BatchNormalization)
-                and isinstance(node.get_input_node(node.inputs[0]), BatchNormalization)
-                and not node.get_input_node(node.inputs[0]).get_attr("quant_precision"))
+        prev_node = node.get_input_node(node.inputs[0])
+        basic_match = (isinstance(node, BatchNormalization)
+                and isinstance(prev_node, BatchNormalization)
+                and not prev_node.get_attr("quant_precision"))
+
+        # check for compatibility to merge
+        if basic_match:
+            s0 = prev_node.weights['scale'].data_unquantized
+            b0 = prev_node.weights['bias'].data_unquantized
+            s1 = node.weights['scale'].data_unquantized
+            b1 = node.weights['bias'].data_unquantized
+            scale_compatible = (
+                (prev_node.get_attr("scale_quantizer") is None
+                 and node.get_attr("scale_quantizer") is None)
+                or (s0 == np.ones_like(s0)).all()
+                or (s1 == np.ones_like(s1)).all())
+            bias_compatible = (
+                (prev_node.get_attr("bias_quantizer") is None
+                 and node.get_attr("bias_quantizer") is None)
+                or (b0 == np.zeros_like(b0)).all()
+                or (b1 == np.zeros_like(b1)).all())
+            return scale_compatible and bias_compatible
+        else:
+            return False
 
 
     def transform(self, model, node):
@@ -118,12 +140,24 @@ def transform(self, model, node):
         s1 = node.weights['scale'].data_unquantized
         b1 = node.weights['bias'].data_unquantized
 
+        s_quantizer = (node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all()
+                       else prev_node.get_attr("scale_quantizer"))
+        b_quantizer = (node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all()
+                       else prev_node.get_attr("bias_quantizer"))
+
+        node.set_attr("scale_quantizer", s_quantizer)
+        node.set_attr("bias_quantizer", b_quantizer)
+        if s_quantizer:
+            node.set_attr("scale_precision", s_quantizer.hls_type)
+        if b_quantizer:
+            node.set_attr("bias_precision", b_quantizer.hls_type)
+
         scale_new = s0 * s1
         bias_new = s1 * b0 + b1
 
         # call function so that quantizer would be called if needed
-        node.add_weights(scale_new, quantizer=node.get_attr("scale_quantizer"))
-        node.add_bias(bias_new, quantizer=node.get_attr("bias_quantizer"))
+        node.add_weights(scale_new, quantizer=s_quantizer)
+        node.add_bias(bias_new, quantizer=b_quantizer)
 
         model.remove_node(prev_node, rewire=True)
         return True
diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index 1f2b231f20..ae8851c834 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -1,31 +1,69 @@
+import numpy as np
+
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.layers import BatchNormalization, Dense, Conv1D, Conv2D
 
 class FuseBatchNormalization(OptimizerPass):
     def match(self, node):
-        is_match = isinstance(node, BatchNormalization) and \
-            isinstance(node.get_input_node(), (Dense, Conv1D, Conv2D)) and \
-            node.get_input_node().get_attr('weight_quantizer') is None and \
-            node.get_input_node().get_attr('bias_quantizer') is None
-        return is_match
+        prev_node = node.get_input_node(node.inputs[0])
+        basic_match = (isinstance(node, BatchNormalization)
+            and isinstance(prev_node, (Dense, Conv1D, Conv2D))
+            and not prev_node.get_attr("quant_precision"))
+
+        if basic_match:
+            s0 = prev_node.weights['weight'].data_unquantized
+            b0 = prev_node.weights['bias'].data_unquantized
+            s1 = node.weights['scale'].data_unquantized
+            b1 = node.weights['bias'].data_unquantized
+            scale_compatible = (
+                (prev_node.get_attr("weight_quantizer") is None
+                 and node.get_attr("scale_quantizer") is None)
+                or (s0 == np.ones_like(s0)).all()
+                or (s1 == np.ones_like(s1)).all())
+            bias_compatible = (
+                (prev_node.get_attr("bias_quantizer") is None
+                 and node.get_attr("bias_quantizer") is None)
+                or (b0 == np.zeros_like(b0)).all()
+                or (b1 == np.zeros_like(b1)).all())
+            return scale_compatible and bias_compatible
+        else:
+            return False
+
 
     def transform(self, model, node):
-        # Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values
-        parent_node = node.get_input_node()
+        """ Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values
+        """
+
+        # copying much of the logic from FuseConsecutiveBatchNormalization
+        # (hence weight = scale in the variable names)
 
-        parent_weight = parent_node.weights['weight']
-        parent_bias = parent_node.weights['bias']
+        prev_node = node.get_input_node(node.inputs[0])
 
-        bn_scale = node.weights['scale']
-        bn_bias = node.weights['bias']
+        s0 = prev_node.weights['weight'].data_unquantized
+        b0 = prev_node.weights['bias'].data_unquantized
+        s1 = node.weights['scale'].data_unquantized
+        b1 = node.weights['bias'].data_unquantized
 
-        fused_weight = bn_scale.data * parent_weight.data
-        fused_bias = bn_scale.data * parent_bias.data + bn_bias.data
+        s_quantizer = (node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all()
+                       else prev_node.get_attr("weight_quantizer"))
+        b_quantizer = (node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all()
+                       else prev_node.get_attr("bias_quantizer"))
+
+        prev_node.set_attr("weight_quantizer", s_quantizer)
+        prev_node.set_attr("bias_quantizer", b_quantizer)
+        if s_quantizer:
+            prev_node.set_attr("weight_precision", s_quantizer.hls_type)
+        if b_quantizer:
+            prev_node.set_attr("bias_precision", b_quantizer.hls_type)
+
+        scale_new = s0 * s1
+        bias_new = s1 * b0 + b1
+
+        prev_node.set_attr("quant_precision", node.get_attr("quant_precision"))
 
         model.remove_node(node, rewire=True)
-        parent_weight.data = fused_weight
-        parent_bias.data = fused_bias
-        if not parent_node.get_attr('use_bias', True):
-            parent_bias.update_precision(bn_bias.type.precision)
+
+        prev_node.add_weights_variable(name='weight', var_name='w{index}', data=scale_new, quantizer=s_quantizer)
+        prev_node.add_weights_variable(name='bias', var_name='b{index}', data=bias_new, quantizer=b_quantizer)
 
         return True
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index 9caff14d5d..aba1cf9efd 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -1,9 +1,7 @@
 import numpy as np
-import math  # prefer to use math.ceil for scalar values (returns int)
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import IntegerPrecisionType, FixedPrecisionType
+from hls4ml.model.types import IntegerPrecisionType
 from hls4ml.model.layers import Conv, Constant, Conv1D, Conv2D
-from numbers import Integral
 
 _base_attributes = ('Trace', 'reuse_factor', 'in_width', 'out_width', 'n_chan', 'n_filt', 'pad_left', 'pad_right',
     'filt_width', 'stride_width', 'dilation_width', 'in_height', 'out_height', 'pad_top', 'pad_bottom',
@@ -23,8 +21,6 @@ def match(self, node):
     def transform(self, model, node):
         """ Convert Conv with constant to a Conv1D or Conv2D layer """
 
-        input_node = node.get_input_node(node.inputs[0])
-        input_precision = input_node.get_attr("quant_precision")
         weight_node = node.get_input_node(node.inputs[1])
         weight_precision = weight_node.get_attr("quant_precision")
         bias_node = None
@@ -33,16 +29,8 @@ def transform(self, model, node):
             bias_node = node.get_input_node(node.inputs[2])
             bias_precision = bias_node.get_attr("quant_precision")
 
-        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-
-        quant_precision = None
-
-        if weight_precision and input_precision and (bias_precision or not bias_node):
-            quant_precision = propagate_type_conv(input_precision, weight_precision, bias_precision,
-                num_feature_maps=weight_node.value.shape[0], filt_width=attributes['filt_width'],
-                filt_height=attributes.get('filt_height', 1))
-
         #creating the attributes
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
 
         # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
         if node.attributes['n_dim'] == 1:
@@ -53,8 +41,7 @@ def transform(self, model, node):
             attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 3, 0))
         attributes["weight_precision"] = weight_precision
         attributes["weight_quantizer"] =  weight_node.get_attr("quantizer")
-        attributes["quant_precision"] = quant_precision
- 
+
         if bias_node:
             attributes["bias_data"] = bias_node.value,
             attributes["bias_precision"] = bias_precision,
@@ -74,31 +61,3 @@ def transform(self, model, node):
         model.replace_node(node, new_node)
 
         return True
-
-def propagate_type_conv(input_precision: FixedPrecisionType, weight_precision: FixedPrecisionType, bias_precision: FixedPrecisionType,
-     num_feature_maps: Integral, filt_width: Integral, filt_height: Integral):
-    '''
-    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
-    are supported. Rounding modes are propagated from in1
-    '''
-    if input_precision and weight_precision:
-        if (weight_precision.width != weight_precision.integer
-            or input_precision.width != input_precision.integer):
-            raise ValueError("quant_precisions must always have the same width and integer parameters")
-
-        Nacc = filt_width * filt_height * num_feature_maps
-        bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc))
-        signed = weight_precision.signed or input_precision.signed
-        # copy staruation and rounding from input
-        rounding_mode = input_precision.rounding_mode
-        saturation_mode = input_precision.saturation_mode
-
-        # correct if bias
-        if bias_precision:
-            bitwidth = max(bitwidth + (bias_precision.signed and not signed),
-                            bias_precision.width + (signed and not bias_precision.signed)) + 1
-            signed = signed or bias_precision.signed
-        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
-
-    else:
-        return None
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index dc12b19e1b..43f5c14f4f 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -1,9 +1,7 @@
 import numpy as np
-import math  # prefer to use math.ceil for scalar values (returns int)
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import IntegerPrecisionType, NamedType, FixedPrecisionType
 from hls4ml.model.layers import MatMul, Constant, Dense
-from numbers import Integral
 
 _base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t')
 
@@ -25,18 +23,14 @@ def transform(self, model, node):
         other_node = node.get_input_node(node.inputs[0])
         other_var = node.get_input_variable(node.inputs[0])
 
-        quant_precision = None
         weight_precision = const_node.get_attr("quant_precision")
         weight_quantizer = const_node.get_attr("quantizer")
-        other_precision = other_node.get_attr("quant_precision")
 
         in_shape = other_var.shape
         n_in =  np.prod(in_shape)
         out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]]
         n_out = np.prod(out_shape)
 
-        quant_precision = propagate_type_mult(other_precision, weight_precision, in_shape[-1])
-
         #creating the attributes
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({
@@ -45,7 +39,6 @@ def transform(self, model, node):
             "weight_quantizer": weight_quantizer,
             "bias_data": np.zeros(out_shape),
             "bias_precision": IntegerPrecisionType(1, False),
-            "quant_precision": quant_precision,
             "n_in": n_in,
             "n_out": n_out
         })
@@ -54,31 +47,8 @@ def transform(self, model, node):
         new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes,
             [node.inputs[0]], [x for x in node.outputs])
 
-        if quant_precision:
-            accum_t = NamedType('layer{}_accum_t'.format(new_dense.index), quant_precision)
-            new_dense.set_attr('accum_t', accum_t)
-
         #removing and replacing old nodes
         model.remove_node(const_node, rewire=False)
         model.replace_node(node, new_dense)
 
         return True
-
-def propagate_type_mult(in1: FixedPrecisionType, in2: FixedPrecisionType, num_acc: Integral):
-    '''
-    Propagate the precion type across a multiply. Currently only "quant_precision" types (with no fractional bits)
-    are supported. Rounding modes are propagated from in1
-    '''
-    if in2 and in1:
-        if (in2.width != in2.integer
-            or in1.width != in1.integer):
-            raise ValueError("quant_precisions must always have the same width and integer parameters")
-
-        bitwidth = in2.width + in1.width + math.ceil(np.log2(num_acc))
-        signed = in2.signed or in1.signed
-        # copy staruation and rounding from "in1"
-        rounding_mode = in1.rounding_mode
-        saturation_mode = in1.saturation_mode
-        return FixedPrecisionType(bitwidth, bitwidth, signed, rounding_mode, saturation_mode)
-    else:
-        return None
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index f8a3d04e7a..773507caa2 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -1,6 +1,7 @@
 import numpy as np
 from hls4ml.model.layers import Merge, Constant, BatchNormalization
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
 
 _base_attributes = ('Trace', 'reuse_factor', 'n_in')
 
@@ -87,11 +88,17 @@ def transform(self, model, node):
         input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape
         n_in = np.prod(input_shape)
 
+        scale_precision = None
+        scale_quantizer = None
+        bias_precision = None
+        bias_quantizer = None
 
         op = node.attributes["op"]
         if op in ('add', 'sum'):
             scale = np.array(1)
             bias = const_node.value
+            bias_precision = const_node.get_attr("quant_precision")
+            bias_quantizer = const_node.get_attr("quantizer")
         elif op == 'sub':
             if node1const:
                 scale = np.array(1)
@@ -99,10 +106,20 @@ def transform(self, model, node):
             else:
                 scale = np.array(-1)
                 bias = const_node.value
+            bias_precision = const_node.get_attr("quant_precision")
+            bias_quantizer = const_node.get_attr("quantizer")
+            if bias_precision and not bias_precision.signed:
+                # need to add a bit
+                bias_precision.signed = 1
+                bias_precision.width += 1
+                bias_precision.integer += 1
+                bias_quantizer = QuantNodeQuantizer(bias_precision)
 
         elif op == 'mul':
             scale = const_node.value
             bias = np.array(0)
+            scale_precision = const_node.get_attr("quant_precision")
+            scale_quantizer = const_node.get_attr("quantizer")
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({
@@ -110,7 +127,11 @@ def transform(self, model, node):
             "bias_data": bias,
             "n_in": n_in,
             "n_out": n_in,
-            "n_filt": -1
+            "n_filt": -1,
+            "scale_precision": scale_precision,
+            "scale_quantizer": scale_quantizer,
+            "bias_precision": bias_precision,
+            "bias_quantizer": bias_quantizer
         })
 
         bn_layer = model.make_node(BatchNormalization, f"bn_{node.name}",
@@ -123,7 +144,11 @@ def transform(self, model, node):
         return True
 
 class MergeToBatchNormalizationDiv(OptimizerPass):
-    """ Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization """
+    """
+    Convert Div Merges with consant to BatchNormalization
+
+    TODO:  propagate precision
+    """
     def match(self, node):
         is_match = (isinstance(node, Merge)
                     and node.attributes["op"] == 'div'
diff --git a/hls4ml/model/optimizer/passes/propagate_conv_precision.py b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
new file mode 100644
index 0000000000..236a1d23c0
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
@@ -0,0 +1,65 @@
+import numpy as np
+import math  # prefer to use math.ceil for scalar values (returns int)
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import NamedType, FixedPrecisionType
+from hls4ml.model.layers import Conv1D, Conv2D
+from numbers import Integral
+
+class PropagateConvPrecision(OptimizerPass):
+    """ Propagate precision for conv nodes. Restrict it to only cases where
+    the precision is set by a quant node, since otherwise the values get huge.
+    """
+    def match(self, node):
+        is_match = isinstance(node, (Conv1D, Conv2D))
+        return is_match
+
+    def transform(self, model, node):
+
+        input_precision = node.get_input_node().get_attr("quant_precision")
+        weight_precision = node.get_attr("weight_precision")
+        if not input_precision or not weight_precision:
+            return False
+
+        bias_precision = node.get_attr("bias_precision")
+        num_feature_maps = node.weights['weight'].data_unquantized.shape[-1]
+        filt_width = node.get_attr('filt_width')
+        filt_height = node.get_attr('filt_height', 1)
+
+        accum_precision = _propagate_type_conv(
+            input_precision, weight_precision, bias_precision,
+            num_feature_maps=num_feature_maps, filt_width=filt_width,
+            filt_height=filt_height)
+
+        accum_t = NamedType('layer{}_accum_t'.format(node.index), accum_precision)
+        node.set_attr('accum_t', accum_t)
+
+        if not node.get_attr("quant_precision"):
+            # output precision not explicitly set by quant node
+            node.update_output_precision(accum_precision)
+
+        return False
+
+def _propagate_type_conv(input_precision, weight_precision, bias_precision,
+                         num_feature_maps, filt_width, filt_height):
+    '''
+    Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
+    '''
+
+    Nacc = filt_width * filt_height * num_feature_maps
+    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc))
+    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(Nacc))
+    signed = weight_precision.signed or input_precision.signed
+    # copy staruation and rounding from input
+    rounding_mode = input_precision.rounding_mode
+    saturation_mode = input_precision.saturation_mode
+
+    frac = bitwidth - integer
+
+    # correct for bias
+    if bias_precision:
+        integer = max(integer + (bias_precision.signed and not signed),
+                        bias_precision.integer + (signed and not bias_precision.signed)) + 1
+        bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
+        signed = signed or bias_precision.signed
+
+    return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode)
diff --git a/hls4ml/model/optimizer/passes/propagate_dense_precision.py b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
new file mode 100644
index 0000000000..381844e289
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
@@ -0,0 +1,61 @@
+import numpy as np
+import math  # prefer to use math.ceil for scalar values (returns int)
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import NamedType, FixedPrecisionType
+from hls4ml.model.layers import Dense
+
+
+class PropagateDensePrecision(OptimizerPass):
+    """
+    Propagate precision for Dense nodes. Restrict it to only cases where
+    the precision is set by a quant node, since otherwise the values get huge.
+    """
+    def match(self, node):
+        is_match = isinstance(node, Dense)
+        return is_match
+
+    def transform(self, model, node):
+
+        input_precision = node.get_input_node().get_attr("quant_precision")
+        weight_precision = node.get_attr("weight_precision")
+        if not input_precision or not weight_precision:
+            return False
+
+        bias_precision = node.get_attr("bias_precision")
+        input_variable = node.get_input_variable()
+        num_acc = input_variable.shape[-1]
+
+        accum_precision = _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc)
+
+        accum_t = NamedType('layer{}_accum_t'.format(node.index), accum_precision)
+        node.set_attr('accum_t', accum_t)
+
+        if not node.get_attr("quant_precision"):
+            # output precision not set by quant node
+            node.update_output_precision(accum_precision)
+
+        return False
+
+def _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc):
+    '''
+    Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
+    '''
+
+    # check to make sure none are None
+    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc))
+    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc))
+    signed = weight_precision.signed or input_precision.signed
+    # copy staruation and rounding from "input_precision"
+    rounding_mode = input_precision.rounding_mode
+    saturation_mode = input_precision.saturation_mode
+
+    frac = bitwidth - integer
+
+    # correct for bias
+    if bias_precision:
+        integer = max(integer + (bias_precision.signed and not signed),
+                      bias_precision.integer + (signed and not bias_precision.signed)) + 1
+        bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
+        signed = signed or bias_precision.signed
+
+    return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode)
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index d09cf5ffe7..127368809c 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -10,6 +10,10 @@
 Linear node to apply the quantization, and another ApplyAlpha to unscale/shift. We depend on optimization steps to move the
 unscaling ApplyAlpha down as needed. Again, when the Quant is a applied ot a Constant, the scaling and Linear nodes are
 immediately merged into the Constant. This is done because it simplifies some of the other optimizations.
+
+UPDATE:  Case 1 is loosened to also include power of 2 scalar scales, not just unitary scale, if 
+    _ALSO_INCLUDE_PO2 is set to true (the default)
+
 '''
 from copy import deepcopy
 import numpy as np
@@ -19,6 +23,8 @@
 from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
 from hls4ml.model.optimizer import OptimizerPass
 
+_ALSO_MATCH_PO2 = True
+
 _base_attributes = ('Trace', 'reuse_factor')
 
 class QuantConstantParameters(OptimizerPass):
@@ -70,9 +76,12 @@ class QuantToActivation(OptimizerPass):
     a Quant to an Activation.
 
     As an optimization, this is not called when the input is constant.
+
+    UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
     '''
     def match(self, node):
         # only matches after the other inputs are already folded
+
         is_match = (isinstance(node, Quant)
                     and not isinstance(node.get_input_node(node.inputs[0]), Constant)
                     and not node.get_input_node(node.inputs[1])
@@ -81,11 +90,21 @@ def match(self, node):
 
         # Only match if the scale is 1s and the zero-point is 0s
         if is_match: # to make sure this is a quant node with inputs
-            input_shape = node.get_input_variable().shape
             scale = node.get_attr("scale")
             bias = node.get_attr("zeropt")
-            is_match = is_match and (scale == np.ones_like(scale)).all()
             is_match = is_match and (bias == np.zeros_like(bias)).all()
+
+            # check if scale is ones-like or a power of two
+            scale_unit_or_po2 = (scale == np.ones_like(scale)).all()
+            if not scale_unit_or_po2 and _ALSO_MATCH_PO2:
+                sqscale = np.squeeze(scale)
+                if not sqscale.shape:
+                    # not an array
+                    mantissa, _ = np.frexp(sqscale)
+                    scale_unit_or_po2 = mantissa == 0.5
+
+            is_match = is_match and scale_unit_or_po2
+
         return is_match
 
     def transform(self, model, node):
@@ -100,8 +119,13 @@ def transform(self, model, node):
         narrow = node.get_attr("narrow")
         signed = node.get_attr("signed")
         bitwidth = node.get_attr("bitwidth")
+        integer = bitwidth
+        scale = node.get_attr("scale")
+        if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all():
+            _, exp = np.frexp(np.squeeze(scale))
+            integer = bitwidth + exp - 1
 
-        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({
@@ -122,6 +146,7 @@ def transform(self, model, node):
 class FuseQuantWithConstant(OptimizerPass):
     '''
     This is for the case when scale is 1 and zeropt is 0. It directly applies the quantization to a constant.
+    UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
     '''
     def match(self, node):
         # only matches after the other inputs are already folded
@@ -136,8 +161,19 @@ def match(self, node):
             input_shape = node.get_input_variable().shape
             scale = node.get_attr("scale")
             bias = node.get_attr("zeropt")
-            is_match = is_match and (scale == np.ones_like(scale)).all()
             is_match = is_match and (bias == np.zeros_like(bias)).all()
+
+            # check if scale is ones-like or a power of two
+            scale_unit_or_po2 = (scale == np.ones_like(scale)).all()
+            if not scale_unit_or_po2 and _ALSO_MATCH_PO2:
+                sqscale = np.squeeze(scale)
+                if not sqscale.shape:
+                    # not an array
+                    mantissa, _ = np.frexp(sqscale)
+                    scale_unit_or_po2 = mantissa == 0.5
+
+            is_match = is_match and scale_unit_or_po2
+
         return is_match
 
     def transform(self, model, node):
@@ -149,8 +185,13 @@ def transform(self, model, node):
         narrow = node.get_attr("narrow")
         signed = node.get_attr("signed")
         bitwidth = node.get_attr("bitwidth")
+        integer = bitwidth
+        scale = node.get_attr("scale")
+        if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all():
+            _, exp = np.frexp(np.squeeze(scale))
+            integer = bitwidth + exp - 1
 
-        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
         const_node = node.get_input_node(node.inputs[0])
         const_node.set_attr("quant_precision", precision)
@@ -203,7 +244,7 @@ def transform(self, model, node):
         signed = node.get_attr("signed")
         bitwidth = node.get_attr("bitwidth")
 
-        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({
@@ -294,7 +335,7 @@ def transform(self, model, node):
         signed = node.get_attr("signed")
         bitwidth = node.get_attr("bitwidth")
 
-        precision, quantizer = _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode)
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
 
         const_node = node.get_input_node(node.inputs[0])
 
@@ -329,7 +370,7 @@ def transform(self, model, node):
         return True
 
 
-def _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode):
+def _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode):
     '''
     A function to determine the precision and quantizer
     '''
@@ -349,7 +390,8 @@ def _calculate_precision_quantizer(bitwidth, signed, narrow, rounding_mode):
         bn_sat = "AP_SAT"
 
     bitwidth = math.ceil(bitwidth)
+    integer = math.ceil(integer)
 
-    precision = FixedPrecisionType(bitwidth, bitwidth, signed, bn_round, bn_sat)
+    precision = FixedPrecisionType(bitwidth, integer, signed, bn_round, bn_sat)
     quantizer = QuantNodeQuantizer(precision)
     return (precision, quantizer)
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 6ac9f9720f..885aae61d6 100755
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -93,5 +93,43 @@ def test_cnv_2w2a():
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
+def test_jet_tagging():
+    # download test model
+    dl_dir = "./"
+    dl_file = dl_dir + "qkeras_jettagging.onnx"
+    jet_tagging_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx"
+    )
+    urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_file = dl_dir + "/qkeras_jettagging-clean.onnx"
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1,16)
+    np.random.seed(0)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+
+    hls_model = hls4ml.converters.convert_from_onnx_model(model,
+                                                          output_dir='hls4mlprj_qonnx_jettag',
+                                                          part='xcu250-figd2104-2L-e',
+                                                          hls_config=config)
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
 if __name__ == '__main__':
     test_tfc_2w2a()

From 16f4765e95c8c7473a384b95bd0a0bd13f814f03 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 11 May 2022 16:08:08 -0500
Subject: [PATCH 21/51] make model.output consistent in taking variable name in
 all cases, not sometimes layer name

---
 .../quartus/passes/transform_types.py         |  4 +-
 hls4ml/model/graph.py                         | 46 ++++++++++++-------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/hls4ml/backends/quartus/passes/transform_types.py b/hls4ml/backends/quartus/passes/transform_types.py
index d1d7b01693..38ead16dba 100644
--- a/hls4ml/backends/quartus/passes/transform_types.py
+++ b/hls4ml/backends/quartus/passes/transform_types.py
@@ -19,9 +19,9 @@ def transform(self, model, node):
             if io_type == 'io_stream':
                 raise Exception('Streaming IO is not supported in Quartus.')
             elif io_type == 'io_parallel':
-                if node.name in node.model.inputs:
+                if out_name in node.model.inputs:
                     new_var = self.struct_var_converter.convert(var, pragma='hls_register', struct_name='inputs')
-                elif node.name in node.model.outputs:
+                elif out_name in node.model.outputs:
                     new_var = self.struct_var_converter.convert(var, pragma='hls_register', struct_name='outputs')
                 else:
                     new_var = self.array_var_converter.convert(var, pragma='hls_register')
diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
index 8434f27992..fd8738868d 100644
--- a/hls4ml/model/graph.py
+++ b/hls4ml/model/graph.py
@@ -136,7 +136,7 @@ def get_target_cycles(self, layer):
             targ_cycles = self.layer_name_targ_cycles.get(layer.__class__.__name__.lower())
         if targ_cycles is None:
             targ_cycles = self.model_targ_cycles
- 
+
         return targ_cycles
 
     def get_strategy(self, layer):
@@ -147,7 +147,7 @@ def get_strategy(self, layer):
             strategy = self.model_strategy
 
         return strategy
-    
+
     def get_conv_implementation(self, layer):
         conv_implementation = self.layer_name_conv_implementation.get(layer.name.lower())
         if conv_implementation is None:
@@ -171,7 +171,7 @@ def get_compression(self, layer):
 
     def _parse_hls_config(self):
         hls_config = self.config['HLSConfig']
-        
+
         self.flows = hls_config.get('Flows')
         if self.flows is None:
             self.flows = [self.backend.get_default_flow()]
@@ -187,9 +187,9 @@ def _parse_hls_config(self):
                 try:
                     selected_optimizers.remove(opt)
                 except ValueError:
-                    pass                
+                    pass
             self.optimizers = selected_optimizers
-        
+
         model_cfg = hls_config.get('Model')
         if model_cfg is not None:
             precision_cfg = model_cfg.get('Precision')
@@ -220,7 +220,7 @@ def _parse_hls_config(self):
                 rf = layer_cfg.get('ReuseFactor')
                 if rf is not None:
                     self.layer_type_rf[layer_type.lower()] = rf
-                
+
                 targ_cycles = layer_cfg.get('TargetCycles')
                 if targ_cycles is not None:
                     self.layer_type_targ_cycles[layer_type.lower()] = targ_cycles
@@ -303,9 +303,15 @@ def __init__(self, config, data_reader, layer_list, inputs=None, outputs=None):
 
         self._applied_flows = []
 
-        # If not provided, assumes layer_list[0] is input, and layer_list[-1] is output
-        self.inputs = inputs if inputs is not None else [layer_list[0]['name']]
-        self.outputs = outputs if outputs is not None else [layer_list[-1]['name']]
+        # If not provided, assumes layer_list[0] is the input layer, and layer_list[-1] is output layer
+
+        # Note, these are actually the variable names, which may differ from the layer name
+        input_layers = inputs if inputs is not None else [layer_list[0]['name']]
+        output_layers = outputs if outputs is not None else [layer_list[-1]['name']]
+        self.inputs = self._find_variables(layer_list, input_layers)
+        if self.inputs != input_layers:
+            raise RuntimeError(f"Currently only support the case when input variables and input layer names match\nInput layers = {input_layers}, input_vars = {self.inputs}")
+        self.outputs = self._find_variables(layer_list, output_layers)
 
         self.index = 0
         self.graph = OrderedDict()
@@ -318,6 +324,12 @@ def __init__(self, config, data_reader, layer_list, inputs=None, outputs=None):
         for flow in self.config.flows:
             self.apply_flow(flow)
 
+    @staticmethod
+    def _find_variables(layer_list, layers):
+        fullnodes = [node for node in layer_list if node['name'] in layers]
+        out_list_lists = [node['outputs'] if 'outputs' in node else [node['name']] for node in fullnodes]
+        return [item for sublist in out_list_lists for item in sublist]  # to flatten
+
     def _make_graph(self, layer_list):
         for layer in layer_list:
             kind = layer['class_name']
@@ -357,7 +369,7 @@ def make_node(self, kind, name, attributes, inputs, outputs=None):
         """ Make a new node not connected to the model graph.
 
         The 'kind' should be a valid layer registered with `register_layer`. If no outputs
-        are specified, a default output named the same as the node will be created. The 
+        are specified, a default output named the same as the node will be created. The
         returned node should be added to the graph with `insert_node` or `replace_node`
         functions.
 
@@ -398,13 +410,13 @@ def make_node(self, kind, name, attributes, inputs, outputs=None):
     def insert_node(self, node, before=None):
         """ Insert a new node into the model graph.
 
-        The node to be inserted should be created with `make_node()` function. The optional 
+        The node to be inserted should be created with `make_node()` function. The optional
         parameter `before` can be used to specify the node that follows in case of ambiguities.
 
         Args:
             node (Layer): Node to insert
             before (Layer, optional): The next node in sequence before which a
-                new node should be inserted. 
+                new node should be inserted.
         Raises:
             Exception: If an attempt to insert a node with multiple inputs is made or if
                 `before` does not specify a correct node in sequence.
@@ -468,7 +480,7 @@ def remove_node(self, node, rewire=True):
                         raise Exception('Cannot rewire a node without child')
             else:
                 raise Exception('Cannot rewire a node without a parent')
-        
+
         del self.output_vars[node.outputs[0]]
         del self.graph[node.name]
         self._update_model_outputs()
@@ -538,7 +550,7 @@ def get_weight_variables(self):
         for layer in self.get_layers():
             weights = layer.get_weights()
             variables.extend(weights)
-        
+
         return variables
 
     def write(self):
@@ -555,7 +567,7 @@ def _make_stamp():
             from random import choice
             length = 8
             return ''.join(choice(hexdigits) for m in range(length))
-        
+
         self.config.config['Stamp'] = _make_stamp()
 
         self.config.backend.write(self)
@@ -585,9 +597,9 @@ def _get_top_function(self, x):
             raise Exception('Model not compiled')
         if len(self.get_input_variables()) == 1:
             xlist = [x]
-        else: 
+        else:
             xlist = x
-        
+
         for xi in xlist:
             if not isinstance(xi, np.ndarray):
                 raise Exception('Expected numpy.ndarray, but got {}'.format(type(x)))

From 83c627df49386a1e4398b8ccef7f313b9a52e1cd Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 15 May 2022 19:54:57 -0500
Subject: [PATCH 22/51] snapshot of working towards quartus fix

---
 .../quartus/passes/transform_types.py         |  8 +++-
 hls4ml/backends/quartus/quartus_backend.py    |  6 ++-
 .../backends/vivado/passes/inplace_reshape.py | 36 ----------------
 test/pytest/test_qonnx.py                     | 42 ++++++++++++++++++-
 4 files changed, 52 insertions(+), 40 deletions(-)
 delete mode 100644 hls4ml/backends/vivado/passes/inplace_reshape.py

diff --git a/hls4ml/backends/quartus/passes/transform_types.py b/hls4ml/backends/quartus/passes/transform_types.py
index 38ead16dba..5a4d32b2e2 100644
--- a/hls4ml/backends/quartus/passes/transform_types.py
+++ b/hls4ml/backends/quartus/passes/transform_types.py
@@ -1,14 +1,16 @@
 
+from numpy import isin
 from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.backends.fpga.fpga_types import (
-    ACTypeConverter, QuartusArrayVariableConverter, HLSTypeConverter,
+    ACTypeConverter, HLSTypeConverter, QuartusArrayVariableConverter, QuartusInplaceArrayVariableConverter,
     QuartusStructMemberVariableConverter, StaticWeightVariableConverter)
-
+from hls4ml.model.types import InplaceTensorVariable
 
 class TransformTypes(GlobalOptimizerPass):
     def __init__(self):
         self.type_converter = HLSTypeConverter(precision_converter=ACTypeConverter())
         self.array_var_converter = QuartusArrayVariableConverter(type_converter=self.type_converter)
+        self.inplace_array_var_converter = QuartusInplaceArrayVariableConverter(type_converter=self.type_converter)
         self.struct_var_converter = QuartusStructMemberVariableConverter(type_converter=self.type_converter)
         self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter)
 
@@ -23,6 +25,8 @@ def transform(self, model, node):
                     new_var = self.struct_var_converter.convert(var, pragma='hls_register', struct_name='inputs')
                 elif out_name in node.model.outputs:
                     new_var = self.struct_var_converter.convert(var, pragma='hls_register', struct_name='outputs')
+                elif isinstance(var, InplaceTensorVariable):
+                    new_var = self.inplace_array_var_converter.convert(var, pragma='')
                 else:
                     new_var = self.array_var_converter.convert(var, pragma='hls_register')
             else:
diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py
index 4ee6781f82..e3ab32b4a2 100644
--- a/hls4ml/backends/quartus/quartus_backend.py
+++ b/hls4ml/backends/quartus/quartus_backend.py
@@ -46,6 +46,10 @@ def _register_flows(self):
         ]
         quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name)
 
+        optimization_passes = [
+            'quartus:inplace_parallel_reshape',
+        ]
+        optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
 
         templates = self._get_layer_templates()
         template_flow = register_flow('apply_templates', templates, requires=[init_flow], backend=self.name)
@@ -68,7 +72,7 @@ def _register_flows(self):
         else:
             extras_flow = None
 
-        ip_flow_requirements = ['optimize', init_flow, quantization_flow, quartus_types_flow, extras_flow, template_flow]
+        ip_flow_requirements = ['optimize', init_flow, quantization_flow, optimization_flow, quartus_types_flow, extras_flow, template_flow]
         ip_flow_requirements = list(filter(None, ip_flow_requirements))
 
         self._default_flow = register_flow('ip', None, requires=ip_flow_requirements, backend=self.name)
diff --git a/hls4ml/backends/vivado/passes/inplace_reshape.py b/hls4ml/backends/vivado/passes/inplace_reshape.py
deleted file mode 100644
index 557974141c..0000000000
--- a/hls4ml/backends/vivado/passes/inplace_reshape.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import Reshape
-from hls4ml.model.types import InplaceTensorVariable
-
-class InplaceParallelReshape(OptimizerPass):
-    """
-    Because in io_parallel arrays are stored 1D, reshape produces no code
-    """
-    def match(self, node):
-        return isinstance(node, Reshape)
-
-    def transform(self, model, node):
-        if model.config.get_config_value('IOType') != 'io_parallel':
-            return False
-
-        outvar = node.get_output_variable()
-        invar = node.get_input_variable(node.inputs[0])
-        newoutvar = InplaceTensorVariable(outvar, invar)
-        node.set_attr(node.outputs[0], newoutvar)
-        return False
-
-class InplaceStreamFlatten(OptimizerPass):
-    ''' Remove Flatten layer in io_stream '''
-    def match(self, node):
-        # optimizer pass for a flatten layer (1 output dimension)
-        return isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1
-
-    def transform(self, model, node):
-        if model.config.get_config_value('IOType') != 'io_stream':
-            return False
-
-        outvar = node.get_output_variable()
-        invar = node.get_input_variable(node.inputs[0])
-        newoutvar = InplaceTensorVariable(outvar, invar)
-        node.set_attr(node.outputs[0], newoutvar)
-        return False
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 885aae61d6..75970bb0d6 100755
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -49,6 +49,46 @@ def test_tfc_2w2a():
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
+def test_tfc_2w2a_quartus():
+    # download test model
+    dl_dir = "./"
+    dl_file = dl_dir + "qonnx-tfc-2w2a.onnx"
+    tfc_w2a2_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx"
+    )
+    urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_file = dl_dir + "/qonnx-tfc-2w2a-clean.onnx"
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1,1,28,28)
+    np.random.seed(0)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+    config['LayerName'] = {}
+    config['LayerName']['global_in'] = {'Precision' : 'ac_fixed<16,2>'}
+    hls_model = hls4ml.converters.convert_from_onnx_model(model,
+                                                          output_dir='hls4mlprj_qonnx_tfc-2w2a-quartus',
+                                                          part='Arria10',
+                                                          backend='Quartus',
+                                                          hls_config=config)
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
 def test_cnv_2w2a():
     # download test model
     dl_dir = "./"
@@ -132,4 +172,4 @@ def test_jet_tagging():
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
 if __name__ == '__main__':
-    test_tfc_2w2a()
+    test_tfc_2w2a_quartus()

From 5ae179ac94c908056435f68cb27fa40ec6d48e60 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 16 May 2022 09:50:21 -0500
Subject: [PATCH 23/51] Work around apparent mac clang bug

---
 hls4ml/templates/quartus/build_lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/templates/quartus/build_lib.sh b/hls4ml/templates/quartus/build_lib.sh
index 5310170389..ace25ea1dc 100755
--- a/hls4ml/templates/quartus/build_lib.sh
+++ b/hls4ml/templates/quartus/build_lib.sh
@@ -4,7 +4,7 @@ CC=g++
 if [[ "$OSTYPE" == "linux-gnu" ]]; then
     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique"
 elif [[ "$OSTYPE" == "darwin"* ]]; then
-    CFLAGS="-O3 -fPIC -std=c++11"
+    CFLAGS="-O -fPIC -std=c++11"
 fi
 LDFLAGS=
 INCFLAGS="-Ifirmware/ac_types/ -Ifirmware/ap_types/"

From 46c3f6b9581ab21dd50d0dfd3f239fd171555eeb Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 17 May 2022 11:42:58 -0500
Subject: [PATCH 24/51] Add passes that were forgotten in previous commits

---
 .../fpga/passes/inplace_parallel_reshape.py   | 20 +++++++++++++++++++
 .../vivado/passes/inplace_stream_flatten.py   | 20 +++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 hls4ml/backends/fpga/passes/inplace_parallel_reshape.py
 create mode 100644 hls4ml/backends/vivado/passes/inplace_stream_flatten.py

diff --git a/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py b/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py
new file mode 100644
index 0000000000..fd5cf45be7
--- /dev/null
+++ b/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py
@@ -0,0 +1,20 @@
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import Reshape
+from hls4ml.model.types import InplaceTensorVariable
+
+class InplaceParallelReshape(OptimizerPass):
+    """
+    Because in io_parallel arrays are stored 1D, reshape produces no code
+    """
+    def match(self, node):
+        return isinstance(node, Reshape)
+
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_parallel':
+            return False
+
+        outvar = node.get_output_variable()
+        invar = node.get_input_variable(node.inputs[0])
+        newoutvar = InplaceTensorVariable(outvar, invar)
+        node.set_attr(node.outputs[0], newoutvar)
+        return False
diff --git a/hls4ml/backends/vivado/passes/inplace_stream_flatten.py b/hls4ml/backends/vivado/passes/inplace_stream_flatten.py
new file mode 100644
index 0000000000..7ab44927e6
--- /dev/null
+++ b/hls4ml/backends/vivado/passes/inplace_stream_flatten.py
@@ -0,0 +1,20 @@
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import Reshape
+from hls4ml.model.types import InplaceTensorVariable
+
+
+class InplaceStreamFlatten(OptimizerPass):
+    ''' Remove Flatten layer in io_stream '''
+    def match(self, node):
+        # optimizer pass for a flatten layer (1 output dimension)
+        return isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1
+
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_stream':
+            return False
+
+        outvar = node.get_output_variable()
+        invar = node.get_input_variable(node.inputs[0])
+        newoutvar = InplaceTensorVariable(outvar, invar)
+        node.set_attr(node.outputs[0], newoutvar)
+        return False

From fa5aba0f1100b6ce306dd157ad841aa0692502d3 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 1 Jun 2022 11:31:01 -0500
Subject: [PATCH 25/51] add more quartus qonnx tests--should maybe parametrize
 in the future

---
 test/pytest/test_qonnx.py | 40 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 75970bb0d6..a3ec7d053c 100755
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -171,5 +171,45 @@ def test_jet_tagging():
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
+
+def test_jet_tagging_quartus():
+    # download test model
+    dl_dir = "./"
+    dl_file = dl_dir + "qkeras_jettagging.onnx"
+    jet_tagging_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx"
+    )
+    urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_file = dl_dir + "/qkeras_jettagging-clean.onnx"
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1,16)
+    np.random.seed(0)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+
+    hls_model = hls4ml.converters.convert_from_onnx_model(model,
+                                                          output_dir='hls4mlprj_qonnx_jettag_quartus',
+                                                          part='Arria10',
+                                                          backend='Quartus',
+                                                          hls_config=config)
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
 if __name__ == '__main__':
     test_tfc_2w2a_quartus()

From e00cfba9e6458f4ee7426a21759a568e7c2cbc72 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 1 Jun 2022 12:19:51 -0500
Subject: [PATCH 26/51] Some code cleanup

---
 .../optimizer/passes/matmul_const_to_dense.py |  2 +-
 test/pytest/test_qonnx.py                     | 46 ++-----------------
 2 files changed, 5 insertions(+), 43 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 43f5c14f4f..98b77f2147 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -1,6 +1,6 @@
 import numpy as np
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import IntegerPrecisionType, NamedType, FixedPrecisionType
+from hls4ml.model.types import IntegerPrecisionType
 from hls4ml.model.layers import MatMul, Constant, Dense
 
 _base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t')
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index a3ec7d053c..9319ecd93d 100755
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -133,7 +133,8 @@ def test_cnv_2w2a():
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
-def test_jet_tagging():
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+def test_jet_tagging(backend):
     # download test model
     dl_dir = "./"
     dl_file = dl_dir + "qkeras_jettagging.onnx"
@@ -163,8 +164,8 @@ def test_jet_tagging():
     # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
 
     hls_model = hls4ml.converters.convert_from_onnx_model(model,
-                                                          output_dir='hls4mlprj_qonnx_jettag',
-                                                          part='xcu250-figd2104-2L-e',
+                                                          output_dir=f'hls4mlprj_qonnx_jettag_{backend}',
+                                                          backend=backend,
                                                           hls_config=config)
     hls_model.compile()
     y_hls4ml = hls_model.predict(X)
@@ -172,44 +173,5 @@ def test_jet_tagging():
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
 
-def test_jet_tagging_quartus():
-    # download test model
-    dl_dir = "./"
-    dl_file = dl_dir + "qkeras_jettagging.onnx"
-    jet_tagging_qonnx_url = (
-        "https://raw.githubusercontent.com/fastmachinelearning/"
-        "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx"
-    )
-    urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file)
-    assert os.path.isfile(dl_file)
-    out_file = dl_dir + "/qkeras_jettagging-clean.onnx"
-
-    # cleanup
-    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
-    model = ModelWrapper(out_file)
-
-    # Execute QONNX model inference
-    # TODO make the test bigger
-    ishape = (1,16)
-    np.random.seed(0)
-    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
-    idict = {model.graph.input[0].name: X}
-    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
-
-    # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model)
-    # Some hand-derived config
-    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
-
-    hls_model = hls4ml.converters.convert_from_onnx_model(model,
-                                                          output_dir='hls4mlprj_qonnx_jettag_quartus',
-                                                          part='Arria10',
-                                                          backend='Quartus',
-                                                          hls_config=config)
-    hls_model.compile()
-    y_hls4ml = hls_model.predict(X)
-
-    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
-
 if __name__ == '__main__':
     test_tfc_2w2a_quartus()

From 6d6e81b8481359318abf44c5585360f6e17ea11a Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 1 Jun 2022 18:58:38 -0500
Subject: [PATCH 27/51] reshape and transpose constant fusion

---
 hls4ml/model/optimizer/__init__.py            |  2 ++
 .../model/optimizer/passes/reshape_const.py   | 36 ++++++++++++++++---
 .../model/optimizer/passes/transpose_opt.py   | 35 +++++++++++++++---
 3 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 28d157253d..41c52851ac 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -20,6 +20,8 @@
     'quant_constant_parameters',
     'quant_to_activation',
     'fuse_quant_with_constant',
+    'reshape_constant_fusion',
+    'transpose_constant_fusion',
     'quant_to_alpha_activation_alpha',
     'const_quant_to_const_alpha',
     'batch_norm_onnx_constant_parameters',
diff --git a/hls4ml/model/optimizer/passes/reshape_const.py b/hls4ml/model/optimizer/passes/reshape_const.py
index 3e4ef82a1b..71aab4e4ec 100644
--- a/hls4ml/model/optimizer/passes/reshape_const.py
+++ b/hls4ml/model/optimizer/passes/reshape_const.py
@@ -10,14 +10,42 @@ def match(self, node):
                     and node.get_input_node(node.inputs[1]))
 
         return is_match
-    
+
     def transform(self, model, node):
         """
         Remove Constant from new shape input. Note, input shape node is already used on initialize
         """
         shape_node =  node.get_input_node(node.inputs[1])
+        node.inputs[1] = ''
         if not isinstance(shape_node, Constant):
-            raise "Nonconstant shape inputs are not currently suppoerted"
+            raise "Nonconstant shape inputs are not currently supported"
         model.remove_node(shape_node, rewire=False)
-       
-        return True
\ No newline at end of file
+
+        return True
+
+class ReshapeConstantFusion(OptimizerPass):
+    """ Remove Constant from new shape input """
+    def match(self, node):
+        is_match = (isinstance(node, Reshape)
+                    and len(node.inputs) >= 0
+                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and (len(node.inputs) == 1
+                        or not node.get_input_node(node.inputs[1])))
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Change the shape of the constant
+        """
+        const_node =  node.get_input_node(node.inputs[0])
+        target_shape = node.get_attr('target_shape')
+        new_val = np.reshape(const_node.value, target_shape)
+        const_node.set_attr('value', new_val)
+        const_node.value = new_val
+        dims = [f'{const_node.name}_{i}' for i in range(len(target_shape))]
+        self.add_output_variable(target_shape, dims, var_name=const_node.name,
+                                 precision=const_node.get_attr("precision"))
+
+        model.remove_node(node, rewire=True)
+        return True
diff --git a/hls4ml/model/optimizer/passes/transpose_opt.py b/hls4ml/model/optimizer/passes/transpose_opt.py
index b3ad9baa7f..021684be2b 100644
--- a/hls4ml/model/optimizer/passes/transpose_opt.py
+++ b/hls4ml/model/optimizer/passes/transpose_opt.py
@@ -1,12 +1,13 @@
+import numpy as np
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import Transpose
+from hls4ml.model.layers import Transpose, Constant
 
 class RemoveUselessTranspose(OptimizerPass):
     def match(self, node):
         is_match = isinstance(node, Transpose) and\
-                   node.get_attr('perm') == [0] #Useless transpose
+                   list(node.get_attr('perm')) == [0] #Useless transpose
         return is_match
-    
+
     def transform(self, model, node):
         """
         Remove a transpose layer if it doesn't do anything. i.e 1D input and perm = [0]
@@ -17,5 +18,31 @@ def transform(self, model, node):
             model.remove_node(node, rewire=False) #Don't rewire if there is no output layer
         else:
             model.remove_node(node, rewire=True)
-       
+
+        return True
+
+class TransposeConstantFusion(OptimizerPass):
+    """ Remove Constant from new shape input """
+    def match(self, node):
+        is_match = (isinstance(node, Transpose)
+                    and len(node.input) >= 0
+                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
+                    and list(node.get_attr('perm')) != [0])
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Change the shape of the constant
+        """
+        const_node = node.get_input_node(node.inputs[0])
+        perm = node.get_attr('perm')
+        new_val = np.transpose(const_node.value, perm)
+        const_node.set_attr('value', new_val)
+        const_node.value = new_val
+        dims = [f'{const_node.name}_{i}' for i in range(len(perm))]
+        self.add_output_variable(new_val.shape, dims, var_name=const_node.name,
+                                 precision=const_node.get_attr("precision"))
+
+        model.remove_node(node, rewire=True)
         return True
\ No newline at end of file

From 4911f715cd0dc766d0e2fe9fc121dbd55dedd01b Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 2 Jun 2022 18:23:00 -0500
Subject: [PATCH 28/51] update reshape test to include Quartus

---
 test/pytest/test_reshape.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/pytest/test_reshape.py b/test/pytest/test_reshape.py
index 505f225547..c8f8c2c6d0 100755
--- a/test/pytest/test_reshape.py
+++ b/test/pytest/test_reshape.py
@@ -8,8 +8,8 @@
 from tensorflow.keras import optimizers
 from tensorflow.keras.layers import Input, Dense, Reshape, Softmax
 
-
-def test_reshape_parallel():
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+def test_reshape_parallel(backend):
     model = tf.keras.models.Sequential([
         tf.keras.layers.Input((10)),
         tf.keras.layers.Dense(10*3),
@@ -18,8 +18,10 @@ def test_reshape_parallel():
     ])
     model.compile(optimizer='adam', loss='mse')
     config = hls4ml.utils.config_from_keras_model(model)
-    output_dir = 'hls4mlprj_reshape_parallel'
-    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir)
+    output_dir = f'hls4mlprj_reshape_parallel_{backend}'
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config,
+                                                           output_dir=output_dir,
+                                                           backend=backend)
     hls_model.compile()
 
 def test_reshape_stream():

From 9535f16621bb63d3ffbb10728d0f5373322c9162 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 2 Jun 2022 14:22:39 -0500
Subject: [PATCH 29/51] remove cppname

---
 hls4ml/backends/fpga/fpga_types.py         | 10 +++---
 hls4ml/model/types.py                      |  1 -
 hls4ml/writer/quartus_writer.py            | 12 +++----
 hls4ml/writer/vivado_accelerator_writer.py | 20 ++++++------
 hls4ml/writer/vivado_writer.py             | 38 +++++++++++-----------
 5 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
index e6bf796c98..ef14f961db 100644
--- a/hls4ml/backends/fpga/fpga_types.py
+++ b/hls4ml/backends/fpga/fpga_types.py
@@ -184,11 +184,11 @@ def definition_cpp(self, name_suffix='', as_reference=False):
 
 class VivadoArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        return '{type} {name}{suffix}[{shape}]'.format(type=self.type.name, name=self.cppname, suffix=name_suffix, shape=self.size_cpp())
+        return '{type} {name}{suffix}[{shape}]'.format(type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp())
 
 class QuartusArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        return '{type} {name}{suffix}[{shape}] {pragma}'.format(type=self.type.name, name=self.cppname, suffix=name_suffix, shape=self.size_cpp(), pragma=self.pragma)
+        return '{type} {name}{suffix}[{shape}] {pragma}'.format(type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp(), pragma=self.pragma)
 
 class VivadoInplaceArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self):
@@ -270,9 +270,9 @@ def __init__(self, type_converter):
 class VivadoStreamVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
         if as_reference: # Function parameter
-            return 'hls::stream<{type}> &{name}{suffix}'.format(type=self.type.name, name=self.cppname, suffix=name_suffix)
+            return 'hls::stream<{type}> &{name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix)
         else: # Declaration
-            return 'hls::stream<{type}> {name}{suffix}("{name}")'.format(type=self.type.name, name=self.cppname, suffix=name_suffix)
+            return 'hls::stream<{type}> {name}{suffix}("{name}")'.format(type=self.type.name, name=self.name, suffix=name_suffix)
 
 class VivadoInplaceStreamVariableDefinition(VariableDefinition):
     def definition_cpp(self):
@@ -321,7 +321,7 @@ def __init__(self, type_converter):
 
 class StaticWeightVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        return '{type} {name}[{size}]'.format(type=self.type.name, name=self.cppname, size=self.data_length)
+        return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length)
 
 class StaticWeightVariableConverter(object):
     def __init__(self, type_converter):
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index a42903ff16..a78b4fbc9f 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -186,7 +186,6 @@ class Variable(object):
     def __init__(self, var_name, atype, **kwargs):
         self.name = var_name.format(**kwargs)
         self.type = atype
-        self.cppname = re.sub(r'\W|^(?=\d)','_', self.name)
 
 class TensorVariable(Variable):
     def __init__(self, shape, dim_names, var_name='layer{index}', type_name='layer{index}_t', precision=None, **kwargs):
diff --git a/hls4ml/writer/quartus_writer.py b/hls4ml/writer/quartus_writer.py
index f8c35eea05..ea78bde095 100644
--- a/hls4ml/writer/quartus_writer.py
+++ b/hls4ml/writer/quartus_writer.py
@@ -357,10 +357,10 @@ def write_bridge(self, model):
             elif '//hls-fpga-machine-learning insert header' in line:
                 dtype = line.split('#', 1)[1].strip()
                 inputs_str = ', '.join(
-                    ['{type} {name}[{shape}]'.format(type=dtype, name=i.cppname, shape=i.size_cpp()) for i in
+                    ['{type} {name}[{shape}]'.format(type=dtype, name=i.member_name, shape=i.size_cpp()) for i in
                      model_inputs])
                 outputs_str = ', '.join(
-                    ['{type} {name}[{shape}]'.format(type=dtype, name=o.cppname, shape=o.size_cpp()) for o in
+                    ['{type} {name}[{shape}]'.format(type=dtype, name=o.member_name, shape=o.size_cpp()) for o in
                      model_outputs])
                 insize_str = ', '.join(
                     ['unsigned short &const_size_in_{}'.format(i) for i in range(1, len(model_inputs) + 1)])
@@ -380,8 +380,8 @@ def write_bridge(self, model):
                 for i in model_inputs:
                     newline += indent + 'nnet::convert_data<{}, {}, {}>({}, inputs_ap.{});\n'.format(dtype, i.type.name,
                                                                                                      i.size_cpp(),
-                                                                                                     i.cppname,
-                                                                                                     i.cppname)
+                                                                                                     i.member_name,
+                                                                                                     i.member_name)
                 newline += '\n'
 
                 newline += indent + 'output_data outputs_ap;\n'
@@ -393,8 +393,8 @@ def write_bridge(self, model):
                     newline += indent + 'nnet::convert_data_back<{}, {}, {}>(outputs_ap.{}, {});\n'.format(o.type.name,
                                                                                                            dtype,
                                                                                                            o.size_cpp(),
-                                                                                                           o.cppname,
-                                                                                                           o.cppname)
+                                                                                                           o.member_name,
+                                                                                                           o.member_name)
             elif '//hls-fpga-machine-learning insert trace_outputs' in line:
                 newline = ''
                 for layer in model.get_layers():
diff --git a/hls4ml/writer/vivado_accelerator_writer.py b/hls4ml/writer/vivado_accelerator_writer.py
index 8d726c3e22..88ec4c3d6a 100644
--- a/hls4ml/writer/vivado_accelerator_writer.py
+++ b/hls4ml/writer/vivado_accelerator_writer.py
@@ -251,11 +251,11 @@ def write_wrapper_test(self, model):
             elif '{}('.format(model.config.get_project_name()) in line:
                 indent_amount = line.split(model.config.get_project_name())[0]
                 newline = indent_amount + '{}_axi(inputs,outputs);\n'.format(model.config.get_project_name())
-            elif inp.size_cpp() in line or inp.cppname in line or inp.type.name in line:
-                newline = line.replace(inp.size_cpp(), 'N_IN').replace(inp.cppname, 'inputs').replace(inp.type.name,
+            elif inp.size_cpp() in line or inp.name in line or inp.type.name in line:
+                newline = line.replace(inp.size_cpp(), 'N_IN').replace(inp.name, 'inputs').replace(inp.type.name,
                                                                                                       'input_axi_t')
-            elif out.size_cpp() in line or out.cppname in line or out.type.name in line:
-                newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.cppname, 'outputs').replace(out.type.name,
+            elif out.size_cpp() in line or out.name in line or out.type.name in line:
+                newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.name, 'outputs').replace(out.type.name,
                                                                                                         'output_axi_t')
             else:
                 newline = line
@@ -289,17 +289,17 @@ def write_wrapper_test(self, model):
                                        '{}_axi.h'.format(model.config.get_project_name()))
             elif inp.definition_cpp(name_suffix='_ap') in line:
                 newline = line.replace(inp.definition_cpp(name_suffix='_ap'),
-                                       'input_axi_t {}_ap[N_IN]'.format(inp.cppname))
+                                       'input_axi_t {}_ap[N_IN]'.format(inp.name))
             elif out.definition_cpp(name_suffix='_ap') in line:
                 newline = line.replace(out.definition_cpp(name_suffix='_ap'),
-                                       'output_axi_t {}_ap[N_OUT]'.format(out.cppname))
+                                       'output_axi_t {}_ap[N_OUT]'.format(out.name))
             elif '{}('.format(model.config.get_project_name()) in line:
                 indent_amount = line.split(model.config.get_project_name())[0]
-                newline = indent_amount + '{}_axi({}_ap,{}_ap);\n'.format(model.config.get_project_name(), inp.cppname,
-                                                                          out.cppname)
-            elif inp.size_cpp() in line or inp.cppname in line or inp.type.name in line:
+                newline = indent_amount + '{}_axi({}_ap,{}_ap);\n'.format(model.config.get_project_name(), inp.name,
+                                                                          out.name)
+            elif inp.size_cpp() in line or inp.name in line or inp.type.name in line:
                 newline = line.replace(inp.size_cpp(), 'N_IN').replace(inp.type.name, 'input_axi_t')
-            elif out.size_cpp() in line or out.cppname in line or out.type.name in line:
+            elif out.size_cpp() in line or out.name in line or out.type.name in line:
                 newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.type.name, 'output_axi_t')
             else:
                 newline = line
diff --git a/hls4ml/writer/vivado_writer.py b/hls4ml/writer/vivado_writer.py
index 2c1224c682..9b631f2fab 100644
--- a/hls4ml/writer/vivado_writer.py
+++ b/hls4ml/writer/vivado_writer.py
@@ -142,9 +142,9 @@ def write_project_cpp(self, model):
             #Add input/output type
             elif '//hls-fpga-machine-learning insert IO' in line:
                 newline = line
-                all_inputs = [i.cppname for i in model_inputs]
-                all_outputs = [o.cppname for o in model_outputs]
-                all_brams = [b.cppname for b in model_brams]
+                all_inputs = [i.name for i in model_inputs]
+                all_outputs = [o.name for o in model_outputs]
+                all_brams = [b.name for b in model_brams]
                 io_type = model.config.get_config_value("IOType")
 
                 if io_type == 'io_parallel':
@@ -383,13 +383,13 @@ def write_test_bench(self, model):
             elif '//hls-fpga-machine-learning insert bram' in line:
                 newline = line
                 for bram in model_brams:
-                    newline += '#include \"firmware/weights/{}.h\"\n'.format(bram.cppname)
+                    newline += '#include \"firmware/weights/{}.h\"\n'.format(bram.name)
             elif '//hls-fpga-machine-learning insert data' in line:
                 newline = line
                 offset = 0
                 for inp in model_inputs:
                     newline += '      ' + inp.definition_cpp() + ';\n'
-                    newline += '      nnet::copy_data<float, {}, {}, {}>(in, {});\n'.format(inp.type.name, offset, inp.size_cpp(), inp.cppname)
+                    newline += '      nnet::copy_data<float, {}, {}, {}>(in, {});\n'.format(inp.type.name, offset, inp.size_cpp(), inp.name)
                     offset += inp.size()
                 for out in model_outputs:
                     newline += '      ' + out.definition_cpp() + ';\n'
@@ -397,7 +397,7 @@ def write_test_bench(self, model):
                 newline = line
                 for inp in model_inputs:
                     newline += '    ' + inp.definition_cpp() + ';\n'
-                    newline += '    nnet::fill_zero<{}, {}>({});\n'.format(inp.type.name, inp.size_cpp(), inp.cppname)
+                    newline += '    nnet::fill_zero<{}, {}>({});\n'.format(inp.type.name, inp.size_cpp(), inp.name)
                 for out in model_outputs:
                     newline += '    ' + out.definition_cpp() + ';\n'
             elif '//hls-fpga-machine-learning insert top-level-function' in line:
@@ -408,9 +408,9 @@ def write_test_bench(self, model):
                 output_size_vars = ','.join(['size_out{}'.format(o) for o in range(1, len(model_outputs) + 1)])
                 newline += size_str.format(input_size_vars, output_size_vars)
 
-                input_vars = ','.join([i.cppname for i in model_inputs])
-                output_vars = ','.join([o.cppname for o in model_outputs])
-                bram_vars   =','.join([b.cppname for b in model_brams])
+                input_vars = ','.join([i.name for i in model_inputs])
+                output_vars = ','.join([o.name for o in model_outputs])
+                bram_vars   =','.join([b.name for b in model_brams])
 
                 # Concatenate the input, output, and bram variables. Filter out empty/null values
                 all_vars = ','.join(filter(None, [input_vars, output_vars, bram_vars]))
@@ -428,11 +428,11 @@ def write_test_bench(self, model):
             elif '//hls-fpga-machine-learning insert tb-output' in line:
                 newline = line
                 for out in model_outputs:
-                    newline += indent + 'nnet::print_result<{}, {}>({}, fout);\n'.format(out.type.name, out.size_cpp(), out.cppname) #TODO enable this
+                    newline += indent + 'nnet::print_result<{}, {}>({}, fout);\n'.format(out.type.name, out.size_cpp(), out.name) #TODO enable this
             elif '//hls-fpga-machine-learning insert output' in line or '//hls-fpga-machine-learning insert quantized' in line:
                 newline = line
                 for out in model_outputs:
-                    newline += indent + 'nnet::print_result<{}, {}>({}, std::cout, true);\n'.format(out.type.name, out.size_cpp(), out.cppname)
+                    newline += indent + 'nnet::print_result<{}, {}>({}, std::cout, true);\n'.format(out.type.name, out.size_cpp(), out.name)
             else:
                 newline = line
             fout.write(newline)
@@ -463,11 +463,11 @@ def write_bridge(self, model):
             elif '//hls-fpga-machine-learning insert bram' in line:
                 newline = line
                 for bram in model_brams:
-                    newline += '#include \"firmware/weights/{}.h\"\n'.format(bram.cppname)
+                    newline += '#include \"firmware/weights/{}.h\"\n'.format(bram.name)
             elif '//hls-fpga-machine-learning insert header' in line:
                 dtype = line.split('#', 1)[1].strip()
-                inputs_str = ', '.join(['{type} {name}[{shape}]'.format(type=dtype, name=i.cppname, shape=i.size_cpp()) for i in model_inputs])
-                outputs_str = ', '.join(['{type} {name}[{shape}]'.format(type=dtype, name=o.cppname, shape=o.size_cpp()) for o in model_outputs])
+                inputs_str = ', '.join(['{type} {name}[{shape}]'.format(type=dtype, name=i.name, shape=i.size_cpp()) for i in model_inputs])
+                outputs_str = ', '.join(['{type} {name}[{shape}]'.format(type=dtype, name=o.name, shape=o.size_cpp()) for o in model_outputs])
                 insize_str = ', '.join(['unsigned short &const_size_in_{}'.format(i) for i in range(1, len(model_inputs) + 1)])
                 outsize_str = ', '.join(['unsigned short &const_size_out_{}'.format(o) for o in range(1, len(model_outputs) + 1)])
 
@@ -481,7 +481,7 @@ def write_bridge(self, model):
                 newline = ''
                 for i in model_inputs:
                     newline += indent + '{var};\n'.format(var=i.definition_cpp(name_suffix='_ap'))
-                    newline += indent + 'nnet::convert_data<{}, {}, {}>({}, {}_ap);\n'.format(dtype, i.type.name, i.size_cpp(), i.cppname, i.cppname)
+                    newline += indent + 'nnet::convert_data<{}, {}, {}>({}, {}_ap);\n'.format(dtype, i.type.name, i.size_cpp(), i.name, i.name)
                 newline += '\n'
 
                 for o in model_outputs:
@@ -491,9 +491,9 @@ def write_bridge(self, model):
 
                 input_size_vars = ','.join(['const_size_in_{}'.format(i) for i in range(1, len(model_inputs) + 1)])
                 output_size_vars = ','.join(['const_size_out_{}'.format(o) for o in range(1, len(model_outputs) + 1)])
-                input_vars = ','.join([i.cppname + '_ap' for i in model_inputs])
-                bram_vars   =','.join([b.cppname for b in model_brams])
-                output_vars = ','.join([o.cppname + '_ap' for o in model_outputs])
+                input_vars = ','.join([i.name + '_ap' for i in model_inputs])
+                bram_vars   =','.join([b.name for b in model_brams])
+                output_vars = ','.join([o.name + '_ap' for o in model_outputs])
 
                 # Concatenate the input, output, and bram variables. Filter out empty/null values
                 all_vars = ','.join(filter(None, [input_vars, output_vars, bram_vars]))
@@ -504,7 +504,7 @@ def write_bridge(self, model):
                 newline += '\n'
 
                 for o in model_outputs:
-                    newline += indent + 'nnet::convert_data<{}, {}, {}>({}_ap, {});\n'.format(o.type.name, dtype, o.size_cpp(), o.cppname, o.cppname)
+                    newline += indent + 'nnet::convert_data<{}, {}, {}>({}_ap, {});\n'.format(o.type.name, dtype, o.size_cpp(), o.name, o.name)
             elif '//hls-fpga-machine-learning insert trace_outputs' in line:
                 newline = ''
                 for layer in model.get_layers():

From efe4f79d893e33e3cb7add37103daedb3dd50934 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 3 Jun 2022 15:21:21 -0500
Subject: [PATCH 30/51] remove cppname from InplaceVariable stuff

---
 hls4ml/backends/fpga/fpga_types.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
index ef14f961db..0228538bb1 100644
--- a/hls4ml/backends/fpga/fpga_types.py
+++ b/hls4ml/backends/fpga/fpga_types.py
@@ -192,11 +192,11 @@ def definition_cpp(self, name_suffix='', as_reference=False):
 
 class VivadoInplaceArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self):
-        return f'auto& {self.cppname} = {self.input_var.cppname}'
+        return f'auto& {self.name} = {self.input_var.name}'
 
 class QuartusInplaceArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self):
-        return f'auto& {self.cppname} = {self.input_var.cppname}'
+        return f'auto& {self.name} = {self.input_var.name}'
 
 class ArrayVariableConverter(object):
     def __init__(self, type_converter, prefix, definition_cls):
@@ -276,7 +276,7 @@ def definition_cpp(self, name_suffix='', as_reference=False):
 
 class VivadoInplaceStreamVariableDefinition(VariableDefinition):
     def definition_cpp(self):
-        return f'auto& {self.cppname} = {self.input_var.cppname}'
+        return f'auto& {self.name} = {self.input_var.name}'
 
 class StreamVariableConverter(object):
     def __init__(self, type_converter, prefix, definition_cls):

From 166da8bbd0108c3e6fba4859796d76be6effdd0d Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 24 Jun 2022 10:37:54 -0500
Subject: [PATCH 31/51] partial first attempt to add tracing to quartus backend

---
 .../firmware/nnet_utils/nnet_helpers.h        | 44 +++++++++++++++++++
 hls4ml/writer/quartus_writer.py               |  6 +++
 2 files changed, 50 insertions(+)

diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h
index 1027e8fb00..20e39106c6 100755
--- a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h
+++ b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h
@@ -26,6 +26,7 @@
 #include <fstream>
 #include <algorithm>
 #include <map>
+#include <sstream>
 
 namespace nnet {
 
@@ -58,6 +59,49 @@ constexpr int pow2(int x){
   return x == 0 ? 1 : 2 * pow2(x - 1);
 }
 
+extern bool trace_enabled;
+extern std::map<std::string, void *> *trace_outputs;
+extern size_t trace_type_size;
+
+template<class data_T, class save_T>
+void save_output_array(data_T *data, save_T *ptr, size_t layer_size) {
+    for(int i = 0; i < layer_size; i++) {
+        ptr[i] = static_cast<save_T>(data[i].to_double());
+    }
+}
+
+// We don't want to include save_T in this function because it will be inserted into myproject.cpp
+// so a workaround with element size is used
+template<class data_T>
+void save_layer_output(data_T *data, const char *layer_name, size_t layer_size) {
+    if (!trace_enabled) return;
+
+    if (trace_outputs) {
+        if (trace_outputs->count(layer_name) > 0) {
+            if (trace_type_size == 4) {
+                save_output_array(data, (float *) (*trace_outputs)[layer_name], layer_size);
+            } else if (trace_type_size == 8) {
+                save_output_array(data, (double *) (*trace_outputs)[layer_name], layer_size);
+            } else {
+                std::cout << "Unknown trace type!" << std::endl;
+            }
+        } else {
+            std::cout << "Layer name: " << layer_name << " not found in debug storage!" << std::endl;
+        }
+    } else {
+        std::ostringstream filename;
+        filename << "./tb_data/" << layer_name << "_output.log"; //TODO if run as a shared lib, path should be ../tb_data
+        std::fstream out;
+        out.open(filename.str(), std::ios::app);
+        assert(out.is_open());
+        for(int i = 0; i < layer_size; i++) {
+            out << data[i] << " "; // We don't care about precision in text files
+        }
+        out << std::endl;
+        out.close();
+    }
+}
+
 }
 
 #endif
diff --git a/hls4ml/writer/quartus_writer.py b/hls4ml/writer/quartus_writer.py
index f659d7639a..140bea7bbe 100644
--- a/hls4ml/writer/quartus_writer.py
+++ b/hls4ml/writer/quartus_writer.py
@@ -129,6 +129,12 @@ def write_project_cpp(self, model):
                     if func:
                         newline += '    ' + func + '\n'
                         newline += '\n'
+                    if model.config.trace_output and layer.get_attr('Trace', False):
+                        newline += '#ifndef HLS_SYNTHESIS\n'
+                        for var in vars:
+                            newline += '    nnet::save_layer_output<{}>({}, "{}", {});\n'.format(var.type.name, var.name, layer.name, var.size_cpp())
+                        newline += '#endif\n'
+                    newline += '\n'
 
             # Just copy line
             else:

From e98cd71c8b0395c9b347d1a8a70cb9e8b018089b Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 24 Jun 2022 17:05:33 -0500
Subject: [PATCH 32/51] continue adding tracing for quartus

---
 hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h | 1 +
 hls4ml/writer/quartus_writer.py                             | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h
index 20e39106c6..e0e2a7b8dd 100755
--- a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h
+++ b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_helpers.h
@@ -27,6 +27,7 @@
 #include <algorithm>
 #include <map>
 #include <sstream>
+#include <iostream>
 
 namespace nnet {
 
diff --git a/hls4ml/writer/quartus_writer.py b/hls4ml/writer/quartus_writer.py
index 140bea7bbe..173c0cd264 100644
--- a/hls4ml/writer/quartus_writer.py
+++ b/hls4ml/writer/quartus_writer.py
@@ -406,8 +406,7 @@ def write_bridge(self, model):
                 newline = ''
                 for layer in model.get_layers():
                     func = layer.get_attr('function_cpp')
-                    if func and model.config.trace_output and model.config.get_layer_config_value(layer, 'Trace',
-                                                                                                  False):
+                    if func and model.config.trace_output and layer.get_attr('Trace', False):
                         vars = layer.get_variables()
                         for var in vars:
                             newline += indent + 'nnet::trace_outputs->insert(std::pair<std::string, void *>("{}", (void *) malloc({} * element_size)));\n'.format(

From 8674a8aaa3381c9671b30d64f1d9566aa870aa22 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 24 Jun 2022 17:57:42 -0500
Subject: [PATCH 33/51] Add trace pytest, fix bug uncovered in pytest

---
 hls4ml/writer/quartus_writer.py | 11 ++++----
 test/pytest/test_trace.py       | 45 +++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100644 test/pytest/test_trace.py

diff --git a/hls4ml/writer/quartus_writer.py b/hls4ml/writer/quartus_writer.py
index 173c0cd264..7c39825fd2 100644
--- a/hls4ml/writer/quartus_writer.py
+++ b/hls4ml/writer/quartus_writer.py
@@ -128,13 +128,12 @@ def write_project_cpp(self, model):
                     func = layer.get_attr('function_cpp', None)
                     if func:
                         newline += '    ' + func + '\n'
+                        if model.config.trace_output and layer.get_attr('Trace', False):
+                            newline += '#ifndef HLS_SYNTHESIS\n'
+                            for var in vars:
+                                newline += '    nnet::save_layer_output<{}>({}, "{}", {});\n'.format(var.type.name, var.name, layer.name, var.size_cpp())
+                            newline += '#endif\n'
                         newline += '\n'
-                    if model.config.trace_output and layer.get_attr('Trace', False):
-                        newline += '#ifndef HLS_SYNTHESIS\n'
-                        for var in vars:
-                            newline += '    nnet::save_layer_output<{}>({}, "{}", {});\n'.format(var.type.name, var.name, layer.name, var.size_cpp())
-                        newline += '#endif\n'
-                    newline += '\n'
 
             # Just copy line
             else:
diff --git a/test/pytest/test_trace.py b/test/pytest/test_trace.py
new file mode 100644
index 0000000000..6fbad275b0
--- /dev/null
+++ b/test/pytest/test_trace.py
@@ -0,0 +1,45 @@
+import pytest
+import hls4ml
+import tensorflow as tf
+import numpy as np
+from pathlib import Path
+from tensorflow.keras.layers import Dense, Activation
+
+test_root_path = Path(__file__).parent
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+def test_trace(backend):
+    model = tf.keras.models.Sequential()
+    model.add(Dense(2,
+              input_shape=(1,),
+              name='Dense',
+              use_bias=True,
+              kernel_initializer= tf.keras.initializers.RandomUniform(minval=1, maxval=10),
+              bias_initializer='zeros',
+              kernel_regularizer=None,
+              bias_regularizer=None,
+              activity_regularizer=None,
+              kernel_constraint=None,
+              bias_constraint=None))
+    model.add(Activation(activation='elu', name='Activation'))
+    model.compile(optimizer='adam', loss='mse')
+
+    X_input = np.random.rand(100,1)
+
+    keras_prediction = model.predict(X_input)
+
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    for layer in config['LayerName'].keys():
+        config['LayerName'][layer]['Trace'] = True
+
+    output_dir = str(test_root_path / f'hls4mlprj_trace_{backend}')
+
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, backend=backend)
+
+    hls_model.compile()
+    hls4ml_pred, hls4ml_trace = hls_model.trace(X_input)
+    keras_trace = hls4ml.model.profiling.get_ymodel_keras(model, X_input)
+
+    np.testing.assert_allclose(hls4ml_trace['Dense'], keras_trace['Dense'], rtol=1e-2, atol=0.01)
+    np.testing.assert_allclose(hls4ml_pred, keras_prediction, rtol=1e-2, atol=0.01)
+

From c2989662ff1a44e3bdfb53aa54d3abe10d571c98 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 24 Jun 2022 18:08:02 -0500
Subject: [PATCH 34/51] add docstring

---
 test/pytest/test_trace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/pytest/test_trace.py b/test/pytest/test_trace.py
index 6fbad275b0..042ddc06b2 100644
--- a/test/pytest/test_trace.py
+++ b/test/pytest/test_trace.py
@@ -9,6 +9,7 @@
 
 @pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
 def test_trace(backend):
+    '''Test the tracing feature with a simple Keras model.'''
     model = tf.keras.models.Sequential()
     model.add(Dense(2,
               input_shape=(1,),
@@ -42,4 +43,3 @@ def test_trace(backend):
 
     np.testing.assert_allclose(hls4ml_trace['Dense'], keras_trace['Dense'], rtol=1e-2, atol=0.01)
     np.testing.assert_allclose(hls4ml_pred, keras_prediction, rtol=1e-2, atol=0.01)
-

From fd2ef956f36418adb088c307bda1cf1ee10578b9 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 6 Jul 2022 15:30:43 -0500
Subject: [PATCH 35/51] move batchnorm broadcast to fpga_backend from
 vivado_backend

---
 hls4ml/backends/fpga/fpga_backend.py     | 28 +++++++++++++++++++++---
 hls4ml/backends/vivado/vivado_backend.py | 25 +--------------------
 2 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
index 4cb38888a7..f29a642a38 100644
--- a/hls4ml/backends/fpga/fpga_backend.py
+++ b/hls4ml/backends/fpga/fpga_backend.py
@@ -7,12 +7,11 @@
 import re
 
 from hls4ml.backends.backend import Backend
-from hls4ml.model.layers import Layer
+from hls4ml.model.layers import Layer, BatchNormalization
 from hls4ml.model.attributes import Attribute
 from hls4ml.model.types import IntegerPrecisionType, FixedPrecisionType, XnorPrecisionType, ExponentPrecisionType
 from hls4ml.writer import get_writer
-from hls4ml.model.optimizer import model_optimizer
-
+from hls4ml.model.optimizer import layer_optimizer, model_optimizer
 
 class FPGABackend(Backend):
     def __init__(self, name):
@@ -388,3 +387,26 @@ def compute_conv2d_instructions(self, in_H, in_W, in_C, kernel_size=3, stride=1,
     def write_hls(self, model):
         self.writer.write_hls(model)
         return True
+
+    @layer_optimizer(BatchNormalization)
+    def init_batchnormalization(self, layer):
+        '''Broadcast weights and scale if needed'''
+        input_shape = layer.get_input_variable().shape
+
+        scale = layer.weights['scale'].data_unquantized
+        bias = layer.weights['bias'].data_unquantized
+
+        n_filt = layer.get_attr('n_filt', -1)
+
+        scale_bias_shape = input_shape if n_filt == -1 else (n_filt,)
+
+        # Check shape, broadcast if needed. Don't broadcast if a squeeze makes them match.
+        if scale.shape != tuple(scale_bias_shape) and np.squeeze(scale).shape != tuple(scale_bias_shape):
+            layer.add_weights_variable(name='scale', data=np.broadcast_to(scale, scale_bias_shape),
+                                      precision=layer.get_attr("scale_precision"),
+                                      quantizer=layer.get_attr("scale_quantizer"))
+
+        if bias.shape != tuple(scale_bias_shape) and np.squeeze(bias).shape != tuple(scale_bias_shape):
+            layer.add_weights_variable(name='bias', data=np.broadcast_to(bias, scale_bias_shape),
+                                      precision=layer.get_attr("bias_precision"),
+                                      quantizer=layer.get_attr("bias_quantizer"))
diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
index 7820160e27..bdac17440d 100644
--- a/hls4ml/backends/vivado/vivado_backend.py
+++ b/hls4ml/backends/vivado/vivado_backend.py
@@ -7,7 +7,7 @@
 from collections.abc import Iterable
 
 from hls4ml.model.types import FixedPrecisionType, NamedType, IntegerPrecisionType
-from hls4ml.model.layers import Layer, Dense, BatchNormalization, Embedding, Conv1D, Conv2D, Conv2DBatchnorm, SeparableConv1D, SeparableConv2D, DepthwiseConv2D, Activation, ParametrizedActivation, PReLU, Softmax, Pooling1D, Pooling2D, GlobalPooling1D, GlobalPooling2D, ZeroPadding1D, ZeroPadding2D, Merge, Concatenate, Dot, Resize, Transpose, SimpleRNN, LSTM, GRU, GarNet, GarNetStack
+from hls4ml.model.layers import Layer, Dense, Embedding, Conv1D, Conv2D, Conv2DBatchnorm, SeparableConv1D, SeparableConv2D, DepthwiseConv2D, Activation, ParametrizedActivation, PReLU, Softmax, Pooling1D, Pooling2D, GlobalPooling1D, GlobalPooling2D, ZeroPadding1D, ZeroPadding2D, Merge, Concatenate, Dot, Resize, Transpose, SimpleRNN, LSTM, GRU, GarNet, GarNetStack
 from hls4ml.model.attributes import Attribute
 from hls4ml.model.optimizer import get_backend_passes, layer_optimizer, model_optimizer
 from hls4ml.model.flow import register_flow
@@ -210,29 +210,6 @@ def init_depconv2d(self, layer):
         
         layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
-    @layer_optimizer(BatchNormalization)
-    def init_batchnormalization(self, layer):
-        '''Broadcast weights and scale if needed'''
-        input_shape = layer.get_input_variable().shape
-
-        scale = layer.weights['scale'].data_unquantized
-        bias = layer.weights['bias'].data_unquantized
-
-        n_filt = layer.get_attr('n_filt', -1)
-
-        scale_bias_shape = input_shape if n_filt == -1 else (n_filt,)
-
-        # Check shape, broadcast if needed. Don't broadcast if a squeeze makes them match.
-        if scale.shape != tuple(scale_bias_shape) and np.squeeze(scale).shape != tuple(scale_bias_shape):
-            layer.add_weights_variable(name='scale', data=np.broadcast_to(scale, scale_bias_shape),
-                                      precision=layer.get_attr("scale_precision"),
-                                      quantizer=layer.get_attr("scale_quantizer"))
-
-        if bias.shape != tuple(scale_bias_shape) and np.squeeze(bias).shape != tuple(scale_bias_shape):
-            layer.add_weights_variable(name='bias', data=np.broadcast_to(bias, scale_bias_shape),
-                                      precision=layer.get_attr("bias_precision"),
-                                      quantizer=layer.get_attr("bias_quantizer"))
-
 
     @layer_optimizer(Activation)
     def init_activation(self, layer):

From a65192b11a53ccb9103837af76ede8c3f75facff Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 6 Jul 2022 17:07:21 -0500
Subject: [PATCH 36/51] Revert "Work around apparent mac clang bug"

This reverts commit 5ae179ac94c908056435f68cb27fa40ec6d48e60.
---
 hls4ml/templates/quartus/build_lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/templates/quartus/build_lib.sh b/hls4ml/templates/quartus/build_lib.sh
index ace25ea1dc..5310170389 100755
--- a/hls4ml/templates/quartus/build_lib.sh
+++ b/hls4ml/templates/quartus/build_lib.sh
@@ -4,7 +4,7 @@ CC=g++
 if [[ "$OSTYPE" == "linux-gnu" ]]; then
     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique"
 elif [[ "$OSTYPE" == "darwin"* ]]; then
-    CFLAGS="-O -fPIC -std=c++11"
+    CFLAGS="-O3 -fPIC -std=c++11"
 fi
 LDFLAGS=
 INCFLAGS="-Ifirmware/ac_types/ -Ifirmware/ap_types/"

From 9fca924a6c31fbe5d1f15af30e242d067c793bfb Mon Sep 17 00:00:00 2001
From: Sioni Summers <sioni.summers10@imperial.ac.uk>
Date: Fri, 8 Jul 2022 14:19:55 +0200
Subject: [PATCH 37/51] Update test image and qonnx test

---
 test/pytest/ci-template.yml | 4 ++--
 test/pytest/test_qonnx.py   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/pytest/ci-template.yml b/test/pytest/ci-template.yml
index 34a608c67f..45066e7b80 100644
--- a/test/pytest/ci-template.yml
+++ b/test/pytest/ci-template.yml
@@ -1,6 +1,6 @@
 .pytest:
   stage: test
-  image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.2.base
+  image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.3b0.base
   tags: 
     - docker
   before_script:
@@ -20,4 +20,4 @@
         coverage_format: cobertura
         path: test/pytest/coverage.xml
     paths:
-      - test/pytest/hls4mlprj*.tar.gz
\ No newline at end of file
+      - test/pytest/hls4mlprj*.tar.gz
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 9319ecd93d..e20a69c967 100755
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -7,8 +7,8 @@
 import urllib
 import os
 # To conveniently run QONNX inference
-from finn.core.modelwrapper import ModelWrapper
-import finn.core.onnx_exec as oxe
+from qonnx.core.modelwrapper import ModelWrapper
+import qonnx.core.onnx_exec as oxe
 
 def test_tfc_2w2a():
     # download test model

From 5920d1d4ff49f4fb279d736ed2b73e38c54e7562 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 21 Jul 2022 18:24:23 -0500
Subject: [PATCH 38/51] fix extras for quartus_backend when we have
 optimization_passes

---
 hls4ml/backends/quartus/quartus_backend.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py
index c59a6df4cf..99ef1fa7d1 100644
--- a/hls4ml/backends/quartus/quartus_backend.py
+++ b/hls4ml/backends/quartus/quartus_backend.py
@@ -58,14 +58,13 @@ def _register_flows(self):
             'make_stamp',
             'quartus:write_hls'
         ]
-        writer_flow_requirements = ['optimize', quartus_types_flow, template_flow]
-        self._writer_flow = register_flow('write', writer_passes, requires=writer_flow_requirements, backend=self.name)
+        self._writer_flow = register_flow('write', writer_passes, requires=['quartus:ip'], backend=self.name)
 
         all_passes = get_backend_passes(self.name)
 
         extras = [
             # Ideally this should be empty
-            opt_pass for opt_pass in all_passes if opt_pass not in initializers + quartus_types + templates + writer_passes
+            opt_pass for opt_pass in all_passes if opt_pass not in initializers + optimization_passes + quartus_types + templates + writer_passes
         ]
 
         if len(extras) > 0:

From 95d4c41288f5aeea6626e83789c58695d67b6434 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 21 Jul 2022 18:30:24 -0500
Subject: [PATCH 39/51] Delete old and broken Gemm parsing:  use qonnx package
 to convert to MatMul and Add

---
 hls4ml/converters/onnx/core.py   | 18 ------------------
 hls4ml/converters/onnx_to_hls.py |  5 -----
 2 files changed, 23 deletions(-)

diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py
index 2266e7f964..4fb7e667de 100644
--- a/hls4ml/converters/onnx/core.py
+++ b/hls4ml/converters/onnx/core.py
@@ -1,24 +1,6 @@
 import numpy as np
 from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute
 
-@onnx_handler(*['Gemm'])
-def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config):
-
-    layer = {}
-
-    layer['class_name'] = 'Dense'
-    layer['name'] = node.name
-    layer['inputs'] = node.input
-    layer['outputs'] = node.output
-
-    tran_weight = get_onnx_attribute(node, 'transB', 0)
-    reader.add_input(layer['name'], node.input, tran_weight)
-
-    weights_shape = input_shapes[1][:]
-    layer['n_in'] = weights_shape[0]
-    layer['n_out'] = weights_shape[1]
-
-    return layer
 
 @onnx_handler('MatMul')
 def parse_matmul_layer(reader, node, inputs_map, input_shapes, graph, config):
diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py
index 91da4d9d4d..01176febb9 100644
--- a/hls4ml/converters/onnx_to_hls.py
+++ b/hls4ml/converters/onnx_to_hls.py
@@ -66,11 +66,6 @@ def get_weights_data(self, layer_name, var_name):
                 else:
                     data = data.transpose()
 
-            #Check for transB in Gemm
-            if node.op_type == 'Gemm':
-                if not get_onnx_attribute(node, 'transB'):
-                    data = data.transpose()
-
         return data
 
     def add_input(self, layer_name, inputs, transpose=True, perm=None):

From 951c2ceace48e7dbbe37c4f54e9af780ef1cf95a Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 10 Oct 2022 11:29:46 -0500
Subject: [PATCH 40/51] remove rounding and saturation modes from accumulator
 when precision calculated

---
 hls4ml/model/optimizer/passes/propagate_conv_precision.py  | 7 ++++---
 hls4ml/model/optimizer/passes/propagate_dense_precision.py | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/propagate_conv_precision.py b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
index 236a1d23c0..44e14b550a 100644
--- a/hls4ml/model/optimizer/passes/propagate_conv_precision.py
+++ b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
@@ -49,9 +49,10 @@ def _propagate_type_conv(input_precision, weight_precision, bias_precision,
     bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc))
     integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(Nacc))
     signed = weight_precision.signed or input_precision.signed
-    # copy staruation and rounding from input
-    rounding_mode = input_precision.rounding_mode
-    saturation_mode = input_precision.saturation_mode
+
+    # Because calculating precision, no need to round or sautration
+    rounding_mode = None
+    saturation_mode = None
 
     frac = bitwidth - integer
 
diff --git a/hls4ml/model/optimizer/passes/propagate_dense_precision.py b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
index 381844e289..867da50d2b 100644
--- a/hls4ml/model/optimizer/passes/propagate_dense_precision.py
+++ b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
@@ -45,9 +45,10 @@ def _propagate_type_dense(input_precision, weight_precision, bias_precision, num
     bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc))
     integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc))
     signed = weight_precision.signed or input_precision.signed
-    # copy staruation and rounding from "input_precision"
-    rounding_mode = input_precision.rounding_mode
-    saturation_mode = input_precision.saturation_mode
+
+    # Because calculating precision, no need to round or sautration
+    rounding_mode = None
+    saturation_mode = None
 
     frac = bitwidth - integer
 

From 2fe81d718cf3dcadeefdc3d935c81e85abe727ce Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 5 Jan 2023 20:45:36 -0600
Subject: [PATCH 41/51] fix parsing when flatten is the first layer after input

---
 hls4ml/converters/onnx_to_hls.py | 83 +++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py
index 01176febb9..4810d01c7b 100644
--- a/hls4ml/converters/onnx_to_hls.py
+++ b/hls4ml/converters/onnx_to_hls.py
@@ -95,25 +95,55 @@ def get_onnx_attribute(operation, name, default=None):
             value = value.decode()
     return value
 
-def get_input_shape(graph, operation, input_idx=None):
-    """ Return the input shapes of the model. If input_dx is not specified, then the full array is returned
+
+def get_global_input_shape(graph, inp):
+    """Return the global input shape of the graph with name inp
+
+    Arguments:
+        graph:  the onnx graph
+        inp:  the global input name (str)
+
+    Returns:
+        The shape (tuple)
+
+    Raises:
+        StopIteration:  If the global input name is not found
     """
-    if input_idx is None:
-        rv = []
-        for inp in operation.input:
-            value_info_idx = next((i for i, x in enumerate(graph.value_info) if x.name == inp), 0)
-            dim = [d.dim_value for d in graph.value_info[value_info_idx].type.tensor_type.shape.dim]
-            if dim:
-                rv.append(dim)
-        return rv
-    else:
-        value_info_idx = next((i for i, x in enumerate(graph.value_info) if x.name == operation.input[input_idx]), 0)
-        return [d.dim_value for d in graph.value_info[value_info_idx].type.tensor_type.shape.dim]
+    inp_shape = next((x.type.tensor_type.shape.dim for x in graph.input if x.name == inp))
+    return tuple(x.dim_value for x in inp_shape)
+
+
+def get_input_shape(graph, node):
+    """ Return the input shapes of the node in the model
+
+    Arguments:
+        graph:  the onnx graph
+        node:  the node for which the input is desired
+
+    Returns:
+        The shapes of all the inputs (list of tuples)
+
+    Raises:
+        StopIteration:  If the an input name is not found in the graph
+    """
+    rv = []
+    for inp in node.input:
+        try:
+            value_info_idx = next((i for i, x in enumerate(graph.value_info) if x.name == inp))
+            dim = tuple(d.dim_value for d in graph.value_info[value_info_idx].type.tensor_type.shape.dim)
+        except StopIteration:
+            # The input is not in the graph, likely it's the input
+            dim = get_global_input_shape(graph, inp)
+        if dim:
+            rv.append(dim)
+    return rv
+
 
 def get_constant_value(graph, constant_name):
     tensor = next((x for x in graph.initializer if x.name == constant_name), None)
     return numpy_helper.to_array(tensor)
 
+
 def compute_pads_1d(operation, layer):
     auto_pad = get_onnx_attribute(operation, 'auto_pad', 'NOTSET')
     if auto_pad != 'NOTSET':
@@ -135,6 +165,7 @@ def compute_pads_1d(operation, layer):
 
     return pads
 
+
 def compute_pads_2d(operation, layer):
     auto_pad = get_onnx_attribute(operation, 'auto_pad', 'NOTSET')
     if auto_pad != 'NOTSET':
@@ -193,24 +224,22 @@ def get_out_layer_name(graph):
 def onnx_to_hls(config):
     """ Convert onnx model to hls model from configuration.
 
-    Parameters
-    ----------
-    config: dict
-        onnx configuration from yaml file or passed through API.
+    Args:
+        config:
+            onnx configuration (dict) from yaml file or passed through API.
 
-    Returns
-    -------
-    hls_model : hls4ml model object
+    Returns:
+        hls_model: hls4ml model object
 
     """
 
-    #This is a list of dictionaries to hold all the layer info we need to generate HLS
+    # This is a list of dictionaries to hold all the layer info we need to generate HLS
     layer_list = []
 
-    #Extract model architecture
+    # Extract model architecture
     print('Interpreting Model ...')
 
-    model =  onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel']
+    model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel']
 
     # # We don't infer the shapes because the QONNX preprocessing does it. We may want to add it back,
     # # however, if we want to support non-preprocessed ONNX
@@ -231,14 +260,10 @@ def onnx_to_hls(config):
         input_layer = {}
         input_layer['name'] = replace_char_inconsitency(inp)
         input_layer['class_name'] = 'InputLayer'
-        inp_shape = next((x.type.tensor_type.shape.dim for x in model.graph.input if x.name == inp), None)
-        input_layer['input_shape'] = [x.dim_value for x in inp_shape]
-
-        if len(input_layer['input_shape']) > 1:
-            input_layer['input_shape'][0] = None #First dim is batch
+        input_layer['input_shape'] = get_global_input_shape(model.graph, inp)
 
         print('Input shape:', input_layer['input_shape'])
-        #Clean the layer name for specific models
+        # Clean the layer name for specific models
         sanitize_layer_name(input_layer)
         input_layers[i] = input_layer['name']
 

From b9f9bec299c1edeea9399e1a00130ef9f2c7ed91 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 5 Jan 2023 20:46:58 -0600
Subject: [PATCH 42/51] fix fusing bn to Dense when the output name != node
 name

---
 hls4ml/model/optimizer/passes/bn_fuse.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index 0f9dc583d7..533e4567a8 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -36,7 +36,14 @@ def transform(self, model, node):
         parent_node = node.get_input_node()
         parent_map = parent_node.get_output_use_map()
         node_map = node.get_output_use_map()
-        if len(parent_map[parent_node.name]) > 1 or len(node_map[node.name]) > 1:
+
+        if (len(parent_map.keys()) != 1
+            or len(tuple(parent_map.values())[0]) != 1
+            or len(node_map.keys()) != 1
+            or len(tuple(node_map.values())[0]) > 1):
+            # This checks that output of both the parent and the current node
+            # is used at most one time for this optimzation. (For the parent, of course it can't be 0)
+            # JM:  I understand the requirement on the parent, but not on the current node.
             return False
 
         # copying much of the logic from FuseConsecutiveBatchNormalization

From 5195574250489c9e7fad710b4753b5a4808a03a9 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Jan 2023 15:10:59 -0600
Subject: [PATCH 43/51] fix expected rf issues

---
 hls4ml/backends/fpga/fpga_backend.py          | 17 +++++-
 hls4ml/model/layers.py                        |  3 +-
 .../model/optimizer/passes/batchnorm_opt.py   | 58 ++++++++++---------
 3 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
index 185dae867c..5a29ecc172 100644
--- a/hls4ml/backends/fpga/fpga_backend.py
+++ b/hls4ml/backends/fpga/fpga_backend.py
@@ -13,6 +13,8 @@
     LSTM,
     Activation,
     BatchNormalization,
+    BatchNormOnnx,
+    Conv,
     Conv1D,
     Conv2D,
     Dense,
@@ -22,6 +24,8 @@
     GarNetStack,
     GlobalPooling1D,
     GlobalPooling2D,
+    MatMul,
+    Merge,
     Pooling1D,
     Pooling2D,
     Quant,
@@ -71,7 +75,18 @@ def __init__(self, name):
             attrs.append(TypeAttribute('accum'))
             self.attribute_map[layer] = attrs
 
-        rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack, Quant]
+        rf_layers = accum_layers + [
+            BatchNormalization,
+            BatchNormOnnx,
+            Activation,
+            Embedding,
+            GarNet,
+            GarNetStack,
+            Quant,
+            Merge,
+            MatMul,
+            Conv,
+        ]
 
         for layer in rf_layers:
             attrs = self.attribute_map.get(layer, [])
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 43b57b5e06..341bed11ec 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -452,8 +452,9 @@ class Dense(Layer):
     ]
 
     def initialize(self):
-        shape = self.get_input_variable().shape[:]
+        shape = list(self.get_input_variable().shape[:])
         shape[-1] = self.attributes['n_out']
+        shape = tuple(shape)
         if len(shape) > 1:
             dims = [f'N_LAYER_{i}_{self.index}' for i in range(1, len(shape) + 1)]
         else:
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index af9812449b..3fcd812097 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -1,15 +1,16 @@
-from audioop import bias
 import numpy as np
-from hls4ml.model.optimizer import OptimizerPass
+
 from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
+from hls4ml.model.optimizer import OptimizerPass
 
 _base_attributes = ('Trace', 'reuse_factor', 'n_in', 'n_filt')
 
+
 class BatchNormOnnxConstantParameters(OptimizerPass):
-    """ Remove Constant from the BatchNormalization node parameters (but not input[0]) """
+    """Remove Constant from the BatchNormalization node parameters (but not input[0])"""
+
     def match(self, node):
-        is_match = (isinstance(node, BatchNormOnnx)
-                    and any(node.inputs[1:]))
+        is_match = isinstance(node, BatchNormOnnx) and any(node.inputs[1:])
 
         return is_match
 
@@ -60,8 +61,7 @@ def transform(self, model, node):
         attributes["scale_data"] = scale
         attributes["bias_data"] = bias
 
-        new_node = model.make_node(BatchNormalization, node.name, attributes,
-            [node.inputs[0]], [x for x in node.outputs])
+        new_node = model.make_node(BatchNormalization, node.name, attributes, [node.inputs[0]], [x for x in node.outputs])
 
         model.replace_node(node, new_node)
 
@@ -72,11 +72,14 @@ class ConstantBatchNormFusion(OptimizerPass):
     """
     Merge BatchNorm into Const (after parameters have already been merged in BatchNormalization)
     """
+
     def match(self, node):
-        is_match = (isinstance(node, BatchNormalization)
-                    and not any(node.inputs[1:])
-                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and not node.get_input_node(node.inputs[0]).get_attr("quant_precision"))
+        is_match = (
+            isinstance(node, BatchNormalization)
+            and not any(node.inputs[1:])
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[0]).get_attr("quant_precision")
+        )
         return is_match
 
     def transform(self, model, node):
@@ -88,7 +91,7 @@ def transform(self, model, node):
         new_val = const_node.value * node.weights["scale"].data_unquantized + node.weights["bias"].data_unquantized
         const_node.set_attr("value", new_val)
         const_node.set_attr("quantizer", node.get_attr("quantizer"))  # None if not defined
-        const_node.set_attr("quant_precision",  node.get_attr("quant_precision"))
+        const_node.set_attr("quant_precision", node.get_attr("quant_precision"))
 
         # reinitialize (which also runs quantization if quantizer exists)
         const_node.initialize()
@@ -107,9 +110,11 @@ class FuseConsecutiveBatchNormalization(OptimizerPass):
 
     def match(self, node):
         prev_node = node.get_input_node(node.inputs[0])
-        basic_match = (isinstance(node, BatchNormalization)
-                and isinstance(prev_node, BatchNormalization)
-                and not prev_node.get_attr("quant_precision"))
+        basic_match = (
+            isinstance(node, BatchNormalization)
+            and isinstance(prev_node, BatchNormalization)
+            and not prev_node.get_attr("quant_precision")
+        )
 
         # check for compatibility to merge
         if basic_match:
@@ -118,20 +123,19 @@ def match(self, node):
             s1 = node.weights['scale'].data_unquantized
             b1 = node.weights['bias'].data_unquantized
             scale_compatible = (
-                (prev_node.get_attr("scale_quantizer") is None
-                 and node.get_attr("scale_quantizer") is None)
+                (prev_node.get_attr("scale_quantizer") is None and node.get_attr("scale_quantizer") is None)
                 or (s0 == np.ones_like(s0)).all()
-                or (s1 == np.ones_like(s1)).all())
+                or (s1 == np.ones_like(s1)).all()
+            )
             bias_compatible = (
-                (prev_node.get_attr("bias_quantizer") is None
-                 and node.get_attr("bias_quantizer") is None)
+                (prev_node.get_attr("bias_quantizer") is None and node.get_attr("bias_quantizer") is None)
                 or (b0 == np.zeros_like(b0)).all()
-                or (b1 == np.zeros_like(b1)).all())
+                or (b1 == np.zeros_like(b1)).all()
+            )
             return scale_compatible and bias_compatible
         else:
             return False
 
-
     def transform(self, model, node):
         prev_node = node.get_input_node(node.inputs[0])
 
@@ -140,10 +144,12 @@ def transform(self, model, node):
         s1 = node.weights['scale'].data_unquantized
         b1 = node.weights['bias'].data_unquantized
 
-        s_quantizer = (node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all()
-                       else prev_node.get_attr("scale_quantizer"))
-        b_quantizer = (node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all()
-                       else prev_node.get_attr("bias_quantizer"))
+        s_quantizer = (
+            node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all() else prev_node.get_attr("scale_quantizer")
+        )
+        b_quantizer = (
+            node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr("bias_quantizer")
+        )
 
         node.set_attr("scale_quantizer", s_quantizer)
         node.set_attr("bias_quantizer", b_quantizer)

From cc0e5b19aa18a2a607d97de9dc6e81994491c54a Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 21 Nov 2022 18:39:55 -0600
Subject: [PATCH 44/51] make steps to support Flatten for Quartus stream

---
 hls4ml/backends/fpga/fpga_types.py            | 230 ++++++++++++------
 .../passes/inplace_stream_flatten.py          |   5 +-
 hls4ml/backends/quartus/quartus_backend.py    |   3 +-
 3 files changed, 163 insertions(+), 75 deletions(-)
 rename hls4ml/backends/{vivado => fpga}/passes/inplace_stream_flatten.py (94%)

diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
index fe63473844..ceac0b5e4d 100644
--- a/hls4ml/backends/fpga/fpga_types.py
+++ b/hls4ml/backends/fpga/fpga_types.py
@@ -1,18 +1,30 @@
 import numpy as np
 
-from hls4ml.model.types import CompressedType, NamedType, ExponentType, FixedPrecisionType, IntegerPrecisionType, XnorPrecisionType, ExponentPrecisionType, TensorVariable, PackedType, WeightVariable
+from hls4ml.model.types import (
+    CompressedType,
+    ExponentPrecisionType,
+    ExponentType,
+    FixedPrecisionType,
+    IntegerPrecisionType,
+    NamedType,
+    PackedType,
+    XnorPrecisionType,
+)
 
-#region Precision types
+# region Precision types
 
-class PrecisionDefinition(object):
+
+class PrecisionDefinition:
     def definition_cpp(self):
         raise NotImplementedError
 
+
 class APIntegerPrecisionDefinition(PrecisionDefinition):
     def definition_cpp(self):
         typestring = 'ap_{signed}int<{width}>'.format(signed='u' if not self.signed else '', width=self.width)
         return typestring
 
+
 class APFixedPrecisionDefinition(PrecisionDefinition):
     def _rounding_mode_cpp(self, mode):
         if mode is not None:
@@ -23,16 +35,24 @@ def _saturation_mode_cpp(self, mode):
             return 'AP_' + str(mode)
 
     def definition_cpp(self):
-        args = [self.width, self.integer, self._rounding_mode_cpp(self.rounding_mode), self._saturation_mode_cpp(self.saturation_mode), self.saturation_bits]
+        args = [
+            self.width,
+            self.integer,
+            self._rounding_mode_cpp(self.rounding_mode),
+            self._saturation_mode_cpp(self.saturation_mode),
+            self.saturation_bits,
+        ]
         args = ','.join([str(arg) for arg in args if arg is not None])
         typestring = 'ap_{signed}fixed<{args}>'.format(signed='u' if not self.signed else '', args=args)
         return typestring
 
+
 class ACIntegerPrecisionDefinition(PrecisionDefinition):
     def definition_cpp(self):
-        typestring = 'ac_int<{width}, {signed}>'.format(width=self.width, signed=str(self.signed).lower())
+        typestring = f'ac_int<{self.width}, {str(self.signed).lower()}>'
         return typestring
 
+
 class ACFixedPrecisionDefinition(PrecisionDefinition):
     def _rounding_mode_cpp(self, mode):
         if mode is not None:
@@ -43,15 +63,24 @@ def _saturation_mode_cpp(self, mode):
             return 'AC_' + str(mode)
 
     def definition_cpp(self):
-        args = [self.width, self.integer, str(self.signed).lower(), self._rounding_mode_cpp(self.rounding_mode), self._saturation_mode_cpp(self.saturation_mode), self.saturation_bits]
+        args = [
+            self.width,
+            self.integer,
+            str(self.signed).lower(),
+            self._rounding_mode_cpp(self.rounding_mode),
+            self._saturation_mode_cpp(self.saturation_mode),
+            self.saturation_bits,
+        ]
         args = ','.join([str(arg) for arg in args if arg is not None])
-        typestring = 'ac_fixed<{args}>'.format(args=args)
+        typestring = f'ac_fixed<{args}>'
         return typestring
 
-class PrecisionConverter(object):
+
+class PrecisionConverter:
     def convert(self, precision_type):
         raise NotImplementedError
 
+
 class FixedPrecisionConverter(PrecisionConverter):
     def __init__(self, type_map, prefix):
         self.type_map = type_map
@@ -71,7 +100,8 @@ def convert(self, precision_type):
             precision_type.__class__ = type(self.prefix + type_cls_name, (type_cls, definition_cls), {})
             return precision_type
         else:
-            raise Exception('Cannot convert precision type to {}: {}'.format(self.prefix, precision_type.__class__.__name__))
+            raise Exception(f'Cannot convert precision type to {self.prefix}: {precision_type.__class__.__name__}')
+
 
 class APTypeConverter(FixedPrecisionConverter):
     def __init__(self):
@@ -82,9 +112,10 @@ def __init__(self):
                 ExponentPrecisionType: APIntegerPrecisionDefinition,
                 XnorPrecisionType: APIntegerPrecisionDefinition,
             },
-            prefix='AP'
+            prefix='AP',
         )
 
+
 class ACTypeConverter(FixedPrecisionConverter):
     def __init__(self):
         super().__init__(
@@ -94,58 +125,61 @@ def __init__(self):
                 ExponentPrecisionType: ACIntegerPrecisionDefinition,
                 XnorPrecisionType: ACIntegerPrecisionDefinition,
             },
-            prefix='AC'
+            prefix='AC',
         )
 
-#endregion
 
-#region Data types
+# endregion
 
-class TypeDefinition(object):
+# region Data types
+
+
+class TypeDefinition:
     def definition_cpp(self):
         raise NotImplementedError
 
-class TypePrecisionConverter(object):
+
+class TypePrecisionConverter:
     def convert_precision(self, precision_converter):
         self.precision = precision_converter.convert(self.precision)
 
+
 class NamedTypeConverter(TypeDefinition, TypePrecisionConverter):
     def definition_cpp(self):
-        return 'typedef {precision} {name};\n'.format(name=self.name, precision=self.precision.definition_cpp())
+        return f'typedef {self.precision.definition_cpp()} {self.name};\n'
+
 
 class CompressedTypeConverter(TypeDefinition, TypePrecisionConverter):
     def definition_cpp(self):
-        cpp_fmt = (
-            'typedef struct {name} {{'
-            '{index} row_index;'
-            '{index} col_index;'
-            '{precision} weight; }} {name};\n'
-        )
+        cpp_fmt = 'typedef struct {name} {{' '{index} row_index;' '{index} col_index;' '{precision} weight; }} {name};\n'
         return cpp_fmt.format(name=self.name, index=self.index_precision, precision=self.precision.definition_cpp())
 
     def convert_precision(self, precision_converter):
         super().convert_precision(precision_converter)
         self.index_precision = precision_converter.convert(self.index_precision)
 
+
 class ExponentTypeConverter(TypeDefinition, TypePrecisionConverter):
     def definition_cpp(self):
-        cpp_fmt = (
-            'typedef struct {name} {{'
-            '{sign} sign;'
-            '{precision} weight; }} {name};\n'
-        )
+        cpp_fmt = 'typedef struct {name} {{' '{sign} sign;' '{precision} weight; }} {name};\n'
         return cpp_fmt.format(name=self.name, precision=self.precision.definition_cpp(), sign=self.sign.definition_cpp())
 
     def convert_precision(self, precision_converter):
         super().convert_precision(precision_converter)
         self.sign = precision_converter.convert(self.sign)
 
+
 class PackedTypeConverter(TypeDefinition, TypePrecisionConverter):
     def definition_cpp(self):
         n_elem_expr = '/' if self.unpack else '*'
-        return 'typedef nnet::array<{precision}, {n_elem}> {name};\n'.format(name=self.name, precision=self.precision.definition_cpp(), n_elem=str(self.n_elem) + n_elem_expr + str(self.n_pack))
+        return 'typedef nnet::array<{precision}, {n_elem}> {name};\n'.format(
+            name=self.name,
+            precision=self.precision.definition_cpp(),
+            n_elem=str(self.n_elem) + n_elem_expr + str(self.n_pack),
+        )
 
-class HLSTypeConverter(object):
+
+class HLSTypeConverter:
     def __init__(self, precision_converter):
         self.precision_converter = precision_converter
         self.type_map = {
@@ -170,42 +204,54 @@ def convert(self, atype):
             atype.convert_precision(self.precision_converter)
             return atype
         else:
-            raise Exception('Cannot convert type: {}'.format(atype.__class__.__name__))
+            raise Exception(f'Cannot convert type: {atype.__class__.__name__}')
+
 
-#endregion
+# endregion
 
-#region Variables
+# region Variables
 
-class VariableDefinition(object):
+
+class VariableDefinition:
     def definition_cpp(self, name_suffix='', as_reference=False):
         raise NotImplementedError
 
-#region ArrayVariable
+
+# region ArrayVariable
+
 
 class VivadoArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        return '{type} {name}{suffix}[{shape}]'.format(type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp())
+        return '{type} {name}{suffix}[{shape}]'.format(
+            type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp()
+        )
+
 
 class QuartusArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        return '{type} {name}{suffix}[{shape}] {pragma}'.format(type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp(), pragma=self.pragma)
+        return '{type} {name}{suffix}[{shape}] {pragma}'.format(
+            type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp(), pragma=self.pragma
+        )
+
 
 class VivadoInplaceArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self):
         return f'auto& {self.name} = {self.input_var.name}'
 
+
 class QuartusInplaceArrayVariableDefinition(VariableDefinition):
     def definition_cpp(self):
         return f'auto& {self.name} = {self.input_var.name}'
 
-class ArrayVariableConverter(object):
+
+class ArrayVariableConverter:
     def __init__(self, type_converter, prefix, definition_cls):
         self.type_converter = type_converter
         self.prefix = prefix
         self.definition_cls = definition_cls
 
     def convert(self, tensor_var, pragma='partition'):
-        if isinstance(tensor_var, self.definition_cls): # Already converted
+        if isinstance(tensor_var, self.definition_cls):  # Already converted
             return tensor_var
 
         tensor_var.pragma = pragma
@@ -214,38 +260,49 @@ def convert(self, tensor_var, pragma='partition'):
         tensor_var.__class__ = type(self.prefix + 'ArrayVariable', (type(tensor_var), self.definition_cls), {})
         return tensor_var
 
+
 class VivadoArrayVariableConverter(ArrayVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoArrayVariableDefinition)
 
+
 class QuartusArrayVariableConverter(ArrayVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusArrayVariableDefinition)
 
+
 class VivadoInplaceArrayVariableConverter(ArrayVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoInplaceArrayVariableDefinition)
 
+
 class QuartusInplaceArrayVariableConverter(ArrayVariableConverter):
     def __init__(self, type_converter):
-        super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusInplaceArrayVariableDefinition)
+        super().__init__(
+            type_converter=type_converter, prefix='Quartus', definition_cls=QuartusInplaceArrayVariableDefinition
+        )
+
 
-#endregion
+# endregion
+
+# region StructMemberVariable
 
-#region StructMemberVariable
 
 class QuartusStructMemberVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        return '{type} {name}{suffix}[{shape}]'.format(type=self.type.name, name=self.member_name, suffix=name_suffix, shape=self.size_cpp())
+        return '{type} {name}{suffix}[{shape}]'.format(
+            type=self.type.name, name=self.member_name, suffix=name_suffix, shape=self.size_cpp()
+        )
 
-class StructMemberVariableConverter(object):
+
+class StructMemberVariableConverter:
     def __init__(self, type_converter, prefix, definition_cls):
         self.type_converter = type_converter
         self.prefix = prefix
         self.definition_cls = definition_cls
 
     def convert(self, tensor_var, pragma='partition', struct_name=None):
-        if isinstance(tensor_var, self.definition_cls): # Already converted
+        if isinstance(tensor_var, self.definition_cls):  # Already converted
             return tensor_var
 
         tensor_var.pragma = pragma
@@ -259,20 +316,28 @@ def convert(self, tensor_var, pragma='partition', struct_name=None):
         tensor_var.__class__ = type(self.prefix + 'StructMemberVariable', (type(tensor_var), self.definition_cls), {})
         return tensor_var
 
+
 class QuartusStructMemberVariableConverter(StructMemberVariableConverter):
     def __init__(self, type_converter):
-        super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusStructMemberVariableDefinition)
+        super().__init__(
+            type_converter=type_converter, prefix='Quartus', definition_cls=QuartusStructMemberVariableDefinition
+        )
+
+
+# endregion
 
-#endregion
+# region StreamVariable
 
-#region StreamVariable
 
 class VivadoStreamVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        if as_reference: # Function parameter
-            return 'hls::stream<{type}> &{name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix)
-        else: # Declaration
-            return 'hls::stream<{type}> {name}{suffix}("{name}")'.format(type=self.type.name, name=self.name, suffix=name_suffix)
+        if as_reference:  # Function parameter
+            return f'hls::stream<{self.type.name}> &{self.name}{name_suffix}'
+        else:  # Declaration
+            return 'hls::stream<{type}> {name}{suffix}("{name}")'.format(
+                type=self.type.name, name=self.name, suffix=name_suffix
+            )
+
 
 class VivadoInplaceStreamVariableDefinition(VariableDefinition):
     def definition_cpp(self):
@@ -281,78 +346,97 @@ def definition_cpp(self):
 
 class QuartusStreamVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        if as_reference: # Function parameter
-            return 'stream<{type}> &{name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix)
-        else:            # Declaration
-            return 'stream<{type}> {name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix)
+        if as_reference:  # Function parameter
+            return f'stream<{self.type.name}> &{self.name}{name_suffix}'
+        else:  # Declaration
+            return f'stream<{self.type.name}> {self.name}{name_suffix}'
+
 
 class QuartusInplaceStreamVariableDefinition(VariableDefinition):
     def definition_cpp(self):
         return f'auto& {self.name} = {self.input_var.name}'
 
-class StreamVariableConverter(object):
+
+class StreamVariableConverter:
     def __init__(self, type_converter, prefix, definition_cls):
         self.type_converter = type_converter
         self.prefix = prefix
         self.definition_cls = definition_cls
 
     def convert(self, tensor_var, n_pack=1, depth=0):
-        if isinstance(tensor_var, self.definition_cls): # Already converted
+        if isinstance(tensor_var, self.definition_cls):  # Already converted
             return tensor_var
 
         if depth == 0:
             depth = np.prod(tensor_var.shape) // tensor_var.shape[-1]
         tensor_var.pragma = ('stream', depth)
-        tensor_var.type = self.type_converter.convert(PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.shape[-1], n_pack))
+        tensor_var.type = self.type_converter.convert(
+            PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.shape[-1], n_pack)
+        )
 
         tensor_var.__class__ = type(self.prefix + 'StreamVariable', (type(tensor_var), self.definition_cls), {})
         return tensor_var
 
+
 class VivadoStreamVariableConverter(StreamVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoStreamVariableDefinition)
 
+
 class QuartusStreamVariableConverter(StreamVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusStreamVariableDefinition)
 
-#endregion
 
-#region InplaceStreamVariable
+# endregion
+
+# region InplaceStreamVariable
+
 
 class InplaceStreamVariableConverter(StreamVariableConverter):
     def convert(self, tensor_var, n_pack=1, depth=0):
-        if isinstance(tensor_var, self.definition_cls): # Already converted
+        if isinstance(tensor_var, self.definition_cls):  # Already converted
             return tensor_var
 
         tensor_var.pragma = None
-        tensor_var.type = self.type_converter.convert(PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.input_var.shape[-1], n_pack))
+        tensor_var.type = self.type_converter.convert(
+            PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.input_var.shape[-1], n_pack)
+        )
 
         tensor_var.__class__ = type(self.prefix + 'StreamVariable', (type(tensor_var), self.definition_cls), {})
         return tensor_var
 
+
 class VivadoInplaceStreamVariableConverter(InplaceStreamVariableConverter):
     def __init__(self, type_converter):
-        super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoInplaceStreamVariableDefinition)
+        super().__init__(
+            type_converter=type_converter, prefix='Vivado', definition_cls=VivadoInplaceStreamVariableDefinition
+        )
+
 
 class QuartusInplaceStreamVariableConverter(InplaceStreamVariableConverter):
     def __init__(self, type_converter):
-        super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=VivadoInplaceStreamVariableDefinition)
+        super().__init__(
+            type_converter=type_converter, prefix='Quartus', definition_cls=QuartusInplaceStreamVariableDefinition
+        )
+
 
-#endregion
+# endregion
+
+# region WeightsVariable
 
-#region WeightsVariable
 
 class StaticWeightVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
-        return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length)
+        return f'{self.type.name} {self.name}[{self.data_length}]'
+
 
-class StaticWeightVariableConverter(object):
+class StaticWeightVariableConverter:
     def __init__(self, type_converter):
         self.type_converter = type_converter
 
     def convert(self, weight_var):
-        if isinstance(weight_var, StaticWeightVariableDefinition): # Already converted
+        if isinstance(weight_var, StaticWeightVariableDefinition):  # Already converted
             return weight_var
 
         weight_var.weight_class = weight_var.__class__.__name__
@@ -362,12 +446,14 @@ def convert(self, weight_var):
         weight_var.__class__ = type('StaticWeightVariable', (type(weight_var), StaticWeightVariableDefinition), {})
         return weight_var
 
-class BramWeightVariableConverter(object):
+
+class BramWeightVariableConverter:
     @classmethod
     def convert(cls, weight_var):
         weight_var.storage = 'bram'
         return weight_var
 
-#endregion
 
-#endregion
+# endregion
+
+# endregion
diff --git a/hls4ml/backends/vivado/passes/inplace_stream_flatten.py b/hls4ml/backends/fpga/passes/inplace_stream_flatten.py
similarity index 94%
rename from hls4ml/backends/vivado/passes/inplace_stream_flatten.py
rename to hls4ml/backends/fpga/passes/inplace_stream_flatten.py
index 7ab44927e6..6322808400 100644
--- a/hls4ml/backends/vivado/passes/inplace_stream_flatten.py
+++ b/hls4ml/backends/fpga/passes/inplace_stream_flatten.py
@@ -1,10 +1,11 @@
-from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.layers import Reshape
+from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import InplaceTensorVariable
 
 
 class InplaceStreamFlatten(OptimizerPass):
-    ''' Remove Flatten layer in io_stream '''
+    '''Remove Flatten layer in io_stream'''
+
     def match(self, node):
         # optimizer pass for a flatten layer (1 output dimension)
         return isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1
diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py
index c3258ec703..bbc5db694f 100644
--- a/hls4ml/backends/quartus/quartus_backend.py
+++ b/hls4ml/backends/quartus/quartus_backend.py
@@ -64,8 +64,9 @@ def _register_flows(self):
 
         optimization_passes = [
             'quartus:remove_final_reshape',
-            'quartus:inplace_parallel_reshape',
             'quartus:optimize_pointwise_conv',
+            'quartus:inplace_parallel_reshape',
+            'quartus:inplace_stream_flatten',
             'quartus:skip_softmax',
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)

From 01104143e754b82264e4061cff4954159b263c53 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Jan 2023 16:51:31 -0600
Subject: [PATCH 45/51] precommint fixes for onnx converters

---
 hls4ml/converters/onnx/convolution.py | 25 +++++----
 hls4ml/converters/onnx/core.py        | 60 ++++++++++++++------
 hls4ml/converters/onnx/merge.py       | 18 +++---
 hls4ml/converters/onnx/pooling.py     | 38 ++++++++-----
 hls4ml/converters/onnx/quantizer.py   | 14 +++--
 hls4ml/converters/onnx/reshape.py     | 10 ++--
 hls4ml/converters/onnx_to_hls.py      | 79 +++++++++++++++------------
 7 files changed, 145 insertions(+), 99 deletions(-)

diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py
index d96b42351f..f12860579c 100644
--- a/hls4ml/converters/onnx/convolution.py
+++ b/hls4ml/converters/onnx/convolution.py
@@ -1,6 +1,7 @@
 import numpy as np
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, compute_pads_1d, compute_pads_2d
-from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d
+
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
+
 
 @onnx_handler('Conv')
 def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
@@ -9,10 +10,10 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
     layer['name'] = node.name
     if node.domain != 'qonnx.custom_op.channels_last':
         raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
-    layer['data_format'] = 'channels_last' # QONNX needs to be channels-last.
+    layer['data_format'] = 'channels_last'  # QONNX needs to be channels-last.
     layer['inputs'] = node.input
     layer['outputs'] = node.output
-    #reader.add_input(layer['name'], node.input)
+    # reader.add_input(layer['name'], node.input)
 
     strides = get_onnx_attribute(node, 'strides')
     kernel_shape = get_onnx_attribute(node, 'kernel_shape')
@@ -20,7 +21,7 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
     pads = get_onnx_attribute(node, 'pads')
     dilations = get_onnx_attribute(node, 'dilations')
     if dilations is None:
-        dilations = [1]*len(layer['kernel_shape'])
+        dilations = [1] * len(layer['kernel_shape'])
 
     if get_onnx_attribute(node, 'group') != 1:
         raise ValueError("Only 1 group supported corrently")
@@ -34,7 +35,7 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
         raise ValueError("Only 1D and 2D convolutions are supported")
     layer['class_name'] = 'Conv'
 
-    #set some values needed later
+    # set some values needed later
     if layer['n_dim'] == 1:
         # this is 1D convolution
         full_width = layer['in_width'] + pads[0] + pads[1]
@@ -63,11 +64,11 @@ def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
         layer['pad_left'] = pads[1]
         layer['pad_bottom'] = pads[2]
         layer['pad_right'] = pads[3]
-        layer['filt_height'] = kernel_shape[0] 
-        layer['filt_width'] = kernel_shape[1] 
-        layer['stride_height'] = strides[0] 
-        layer['stride_width'] = strides[1] 
-        layer['dilation_height'] = dilations[0] 
-        layer['dilation_width'] = dilations[1] 
+        layer['filt_height'] = kernel_shape[0]
+        layer['filt_width'] = kernel_shape[1]
+        layer['stride_height'] = strides[0]
+        layer['stride_width'] = strides[1]
+        layer['dilation_height'] = dilations[0]
+        layer['dilation_width'] = dilations[1]
 
     return layer
diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py
index 4fb7e667de..f56c63786e 100644
--- a/hls4ml/converters/onnx/core.py
+++ b/hls4ml/converters/onnx/core.py
@@ -1,5 +1,6 @@
 import numpy as np
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute
+
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
 
 @onnx_handler('MatMul')
@@ -14,18 +15,41 @@ def parse_matmul_layer(reader, node, inputs_map, input_shapes, graph, config):
 
     return layer
 
-#------------------Global paras for activations
+
 # TODO: repair HardSigmoid support
 # https://github.com/fastmachinelearning/hls4ml/issues/409
-#activation_layers = ['Relu', 'Tanh', 'Sigmoid', 'LeakyRelu', 'ThresholdedRelu', 'HardSigmoid', 'Elu', 'Selu', 'PRelu', 'Softmax', 'Softsign', 'Softplus', 'Clip']
-activation_layers = ['Relu', 'Tanh', 'Sigmoid', 'LeakyRelu', 'ThresholdedRelu', 'Elu', 'Selu', 'PRelu', 'Softmax', 'Softsign', 'Softplus', 'Clip']
+activation_layers = [
+    'Relu',
+    'Tanh',
+    'Sigmoid',
+    'LeakyRelu',
+    'ThresholdedRelu',
+    'Elu',
+    'Selu',
+    'PRelu',
+    'Softmax',
+    'Softsign',
+    'Softplus',
+    'Clip',
+]
+
+activation_map = {
+    'Relu': 'ReLU',
+    'Tanh': 'Activation',
+    'Sigmoid': 'Activation',
+    'LeakyRelu': 'LeakyReLU',
+    'ThresholdedRelu': 'ThresholdedReLU',
+    'HardSigmoid': 'Activation',
+    'Elu': 'ELU',
+    'Selu': 'Activation',
+    'PRelu': 'PReLU',
+    'Softmax': 'Softmax',
+    'Softsign': 'Activation',
+    'Softplus': 'Activation',
+    'Clip': 'Clip',
+}
+# ---------
 
-activation_map = {'Relu':'ReLU', 'Tanh':'Activation',
-                'Sigmoid':'Activation', 'LeakyRelu':'LeakyReLU',
-                'ThresholdedRelu':'ThresholdedReLU', 'HardSigmoid':'Activation',
-                'Elu':'ELU', 'Selu':'Activation', 'PRelu':'PReLU', 'Softmax':'Softmax',
-                'Softsign':'Activation', 'Softplus':'Activation', 'Clip':'Clip'}
-#---------
 
 @onnx_handler(*activation_layers)
 def parse_activation_layer(reader, node, inputs_map, input_shapes, graph, config):
@@ -51,10 +75,10 @@ def parse_activation_layer(reader, node, inputs_map, input_shapes, graph, config
         elif layer['class_name'] == 'Clip':
 
             clip_min_node = [x for x in graph.initializer if x.name in node.input]
-            clip_min =  clip_min_node[0].float_data[0]
+            clip_min = clip_min_node[0].float_data[0]
 
-            #Check if it's relu or not
-            if clip_min == 0.:
+            # Check if it's relu or not
+            if clip_min == 0.0:
                 layer['class_name'] = 'Activation'
                 layer['activation'] = 'ReLU'
             else:
@@ -66,6 +90,7 @@ def parse_activation_layer(reader, node, inputs_map, input_shapes, graph, config
 
     return layer
 
+
 @onnx_handler('BatchNormalization')
 def parse_batchnorm_layer(reader, node, inputs_map, input_shapes, graph, config):
 
@@ -76,7 +101,7 @@ def parse_batchnorm_layer(reader, node, inputs_map, input_shapes, graph, config)
     layer['inputs'] = node.input
     layer['outputs'] = node.output
 
-    #Other attributes
+    # Other attributes
     layer['epsilon'] = get_onnx_attribute(node, 'epsilon', 1e-05)
     # layer['momentum'] = get_onnx_attribute(node, 'momentum', 0.9)  # not used
 
@@ -87,13 +112,14 @@ def parse_batchnorm_layer(reader, node, inputs_map, input_shapes, graph, config)
     elif len(input_shapes[0]) > 2:
         if node.domain != 'qonnx.custom_op.channels_last':
             raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
-        layer['data_format'] = 'channels_last' # QONNX needs to be channels-last.
-        layer['n_filt']= input_shapes[0][-1]
+        layer['data_format'] = 'channels_last'  # QONNX needs to be channels-last.
+        layer['n_filt'] = input_shapes[0][-1]
     else:
         raise RuntimeError(f"Unexpected input shape: {input_shapes[0]}")
 
     return layer
 
+
 @onnx_handler('Quant')
 def parse_quant_layer(reader, node, inputs_map, input_shapes, graph, config):
 
@@ -104,7 +130,7 @@ def parse_quant_layer(reader, node, inputs_map, input_shapes, graph, config):
     layer['inputs'] = node.input
     layer['outputs'] = node.output
 
-    #Other attributes
+    # Other attributes
     layer['narrow'] = bool(get_onnx_attribute(node, 'narrow'))
     layer['rounding_mode'] = get_onnx_attribute(node, 'rounding_mode')
     layer['signed'] = bool(get_onnx_attribute(node, 'signed'))
diff --git a/hls4ml/converters/onnx/merge.py b/hls4ml/converters/onnx/merge.py
index 4c83b1b06d..0171ee9ace 100644
--- a/hls4ml/converters/onnx/merge.py
+++ b/hls4ml/converters/onnx/merge.py
@@ -1,6 +1,8 @@
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
 merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum']
+
+
 @onnx_handler(*merge_layers)
 def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
 
@@ -17,15 +19,11 @@ def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
             raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.')
 
         layer['class_name'] = 'Concatenate'
-        layer['op'] = layer['class_name'].lower() + '{}d'.format(rank)
+        layer['op'] = layer['class_name'].lower() + f'{rank}d'
         layer['axis'] = get_onnx_attribute(node, 'axis')
 
-        # #Calculate output shape
-        # new_dim = sum([x.type.tensor_type.shape.dim[layer['axis']].dim_value for x in graph.value_info if x.name in node.input])
-        # output_shape[layer['axis']] = new_dim
-
-    elif layer['class_name'] ==  'Add':
-        #Check if the layer is an AddBias
+    elif layer['class_name'] == 'Add':
+        # Check if the layer is an AddBias
         for input in node.input:
             # I think we don't use BiasAdd in ONNX currently
             if "bias" in input:
@@ -35,7 +33,7 @@ def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
                 # reader.add_input(layer['name'], node.input)
                 reader.add_input(layer['name'], input)
 
-        if layer['class_name'] ==  'Add':
+        if layer['class_name'] == 'Add':
             # If it wasn't changed, just make it a merge node
             layer['class_name'] = 'Merge'
 
@@ -45,4 +43,4 @@ def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
     if len(layer['inputs']) > 2:
         raise Exception('ERROR: Merging more than two tensors is not yet supported.')
 
-    return layer
\ No newline at end of file
+    return layer
diff --git a/hls4ml/converters/onnx/pooling.py b/hls4ml/converters/onnx/pooling.py
index 594614fcc0..e72f709966 100644
--- a/hls4ml/converters/onnx/pooling.py
+++ b/hls4ml/converters/onnx/pooling.py
@@ -1,8 +1,10 @@
 import numpy as np
-from hls4ml.converters.onnx_to_hls import onnx_handler, get_onnx_attribute, compute_pads_1d, compute_pads_2d
-from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d
+
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
 pool_operations = ['AveragePool', 'MaxPool']
+
+
 @onnx_handler(*pool_operations)
 def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
 
@@ -13,7 +15,7 @@ def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
     if node.domain != 'qonnx.custom_op.channels_last':
         raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
     layer['class_name'] = node.op_type
-    layer['data_format'] = 'channels_last' #Default QONNX
+    layer['data_format'] = 'channels_last'  # Default QONNX
 
     info = layer['class_name'].replace('Pool', '')
     strides = get_onnx_attribute(node, 'strides')
@@ -22,10 +24,10 @@ def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
     layer['pads'] = pads
     dilations = get_onnx_attribute(node, 'dilations')
     if dilations is None:
-        dilations = [1]*len(kernel_shape)
+        dilations = [1] * len(kernel_shape)
     layer['dilations'] = dilations
 
-    if len(input_shapes[0]) == 3: # 1D
+    if len(input_shapes[0]) == 3:  # 1D
         layer['class_name'] = info + 'Pooling1D'
 
         layer['n_filt'] = input_shapes[0][1]
@@ -35,10 +37,11 @@ def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
         layer['stride_width'] = strides[0]
 
         # formula from ONNX Operators.md documentation
-        layer['n_out'] = int(np.floor((layer['n_in'] + np.sum(pads) - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1))
-
+        layer['n_out'] = int(
+            np.floor((layer['n_in'] + np.sum(pads) - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1)
+        )
 
-    elif len(input_shapes[0]) == 4: # 2D
+    elif len(input_shapes[0]) == 4:  # 2D
         layer['class_name'] = info + 'Pooling2D'
 
         layer['n_filt'] = input_shapes[0][3]
@@ -56,14 +59,19 @@ def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
         layer['pad_right'] = pads[3]
 
         # formula from ONNX Operators.md documentation
-        layer['out_height'] = int(np.floor((layer['in_height'] + pads[0] + pads[2] - ((kernel_shape[0] - 1) * dilations[0] + 1))
-                                  / strides[0] + 1))
-        layer['out_width'] = int(np.floor((layer['in_width'] + pads[1] + pads[3] - ((kernel_shape[1] - 1) * dilations[1] + 1))
-                                  / strides[1] + 1))
+        layer['out_height'] = int(
+            np.floor((layer['in_height'] + pads[0] + pads[2] - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1)
+        )
+        layer['out_width'] = int(
+            np.floor((layer['in_width'] + pads[1] + pads[3] - ((kernel_shape[1] - 1) * dilations[1] + 1)) / strides[1] + 1)
+        )
 
     return layer
 
+
 global_pooling_layers = ['GlobalMaxPool', 'GlobalAveragePool']
+
+
 @onnx_handler(*global_pooling_layers)
 def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, config):
 
@@ -74,14 +82,14 @@ def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, co
     layer['class_name'] = node.op_type
     layer['data_format'] = 'channels_first'
 
-    #Sonme default parameters for global pooling
+    # Sonme default parameters for global pooling
     layer['n_out'] = 1
     layer['pad_left'] = layer['pad_right'] = 0
     layer['stride'] = 0
 
     info = layer['class_name'].replace('Pool', '')
 
-    if len(input_shapes[0]) == 3: # 1D
+    if len(input_shapes[0]) == 3:  # 1D
         layer['class_name'] = info + 'Pooling1D'
 
         layer['n_in'] = input_shapes[0][2]
@@ -94,4 +102,4 @@ def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, co
         layer['in_height'] = input_shapes[0][2]
         layer['in_width'] = input_shapes[0][3]
 
-    return layer
\ No newline at end of file
+    return layer
diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py
index 694eb42967..7f69652c04 100644
--- a/hls4ml/converters/onnx/quantizer.py
+++ b/hls4ml/converters/onnx/quantizer.py
@@ -6,17 +6,20 @@
 """
 
 import numpy as np
-from hls4ml.model.types import Quantizer, SaturationMode, RoundingMode
+
+from hls4ml.model.types import Quantizer, RoundingMode, SaturationMode
+
 
 class QuantNodeQuantizer(Quantizer):
-    """ This implements a quantizer for a FixedPrecisionType with width==integer"""
+    """This implements a quantizer for a FixedPrecisionType with width==integer"""
+
     def __init__(self, precision):
         super().__init__(precision.width, precision)
 
     def __call__(self, data):
-        """ Apply the quantization on the data """
+        """Apply the quantization on the data"""
 
-        scale = 2**(self.hls_type.width - self.hls_type.integer)
+        scale = 2 ** (self.hls_type.width - self.hls_type.integer)
 
         data = data * scale  # (not using *= to avoid modifying data)
         # Clamping
@@ -28,7 +31,6 @@ def __call__(self, data):
         rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode)
         return rounding_fx(data) / scale
 
-
     @staticmethod
     def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
         """Compute the minimum integer representable by a given number of bits.
@@ -76,7 +78,7 @@ def _max_int(signed: bool, bit_width: int) -> int:
             int(255)
         """
         if not signed:
-            value = (2 ** bit_width) - 1
+            value = (2**bit_width) - 1
         else:
             value = (2 ** (bit_width - 1)) - 1
         return value
diff --git a/hls4ml/converters/onnx/reshape.py b/hls4ml/converters/onnx/reshape.py
index 8f5127538f..29eb735946 100644
--- a/hls4ml/converters/onnx/reshape.py
+++ b/hls4ml/converters/onnx/reshape.py
@@ -1,5 +1,5 @@
 from hls4ml.converters.onnx_to_hls import onnx_handler
-import numpy as np
+
 
 @onnx_handler('Transpose')
 def parse_transpose_layer(reader, node, inputs_map, input_shapes, graph, config):
@@ -10,11 +10,12 @@ def parse_transpose_layer(reader, node, inputs_map, input_shapes, graph, config)
     layer['inputs'] = node.input
     layer['outputs'] = node.output
 
-    perm = [list(i.ints) for i in node.attribute][0] #This will get something like [[a,b,c]][0] = [a,b,c]
-    layer['perm'] = [x - 1 for x in perm[1:]] #Ignore the batch dimension in ONNX, and adjust the perm indexing
+    perm = [list(i.ints) for i in node.attribute][0]  # This will get something like [[a,b,c]][0] = [a,b,c]
+    layer['perm'] = [x - 1 for x in perm[1:]]  # Ignore the batch dimension in ONNX, and adjust the perm indexing
 
     return layer
 
+
 @onnx_handler('Reshape')
 def parse_reshape_layer(reader, node, inputs_map, input_shapes, graph, config):
 
@@ -26,6 +27,7 @@ def parse_reshape_layer(reader, node, inputs_map, input_shapes, graph, config):
 
     return layer
 
+
 @onnx_handler('Flatten')
 def parse_flatten_layer(reader, node, inputs_map, input_shapes, graph, config):
 
@@ -36,4 +38,4 @@ def parse_flatten_layer(reader, node, inputs_map, input_shapes, graph, config):
     layer['outputs'] = node.output
     layer['target_shape'] = [-1]
 
-    return layer
\ No newline at end of file
+    return layer
diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py
index 4810d01c7b..a7d7c3e32a 100644
--- a/hls4ml/converters/onnx_to_hls.py
+++ b/hls4ml/converters/onnx_to_hls.py
@@ -1,27 +1,28 @@
-import numpy as np
 import onnx
-from onnx import  helper, numpy_helper, shape_inference
+from onnx import helper, numpy_helper
 
 from hls4ml.model import ModelGraph
 
 MAXMULT = 4096
 
+
 class ONNXDataReader:
     """
     ONNX data reader to be used for extracting relevant information during conversion.
     """
+
     def __init__(self, model):
         self.model = model
         self.input_map = {}
         self.index_map = {
             # Dense
-            'kernel' : 1,
-            'bias'   : 2,
+            'kernel': 1,
+            'bias': 2,
             # BatchNormalization
-            'gamma'  : 1,
-            'beta'   : 2,
-            'moving_mean'   : 3,
-            'moving_variance' : 4,
+            'gamma': 1,
+            'beta': 2,
+            'moving_mean': 3,
+            'moving_variance': 4,
         }
 
     def get_weights_data(self, layer_name, var_name):
@@ -40,8 +41,8 @@ def get_weights_data(self, layer_name, var_name):
             extracted weights data
 
         """
-        #Get the node associated with the layer name
-        node = next((node for node in self.model.graph.node if node.name == layer_name))
+        # Get the node associated with the layer name
+        node = next(node for node in self.model.graph.node if node.name == layer_name)
 
         inputs = self.input_map[layer_name]
         inp_idx = self.index_map[var_name]
@@ -69,9 +70,10 @@ def get_weights_data(self, layer_name, var_name):
         return data
 
     def add_input(self, layer_name, inputs, transpose=True, perm=None):
-        self.input_map[layer_name] = { 'inputs': inputs, 'transpose': transpose, 'perm': perm }
+        self.input_map[layer_name] = {'inputs': inputs, 'transpose': transpose, 'perm': perm}
 
-####----------------------Helpers---------------------######
+
+# ----------------------Helpers---------------------
 def sanitize_layer_name(layer):
     new_name = layer['name']
     if new_name[0].isdigit():
@@ -79,11 +81,13 @@ def sanitize_layer_name(layer):
 
     layer['name'] = new_name
 
+
 def replace_char_inconsitency(name):
     """
     Replace some inconsistent characters that cause issues when writing into HLS.
     """
-    return name.replace('.','_')
+    return name.replace('.', '_')
+
 
 def get_onnx_attribute(operation, name, default=None):
     attr = next((x for x in operation.attribute if x.name == name), None)
@@ -109,12 +113,12 @@ def get_global_input_shape(graph, inp):
     Raises:
         StopIteration:  If the global input name is not found
     """
-    inp_shape = next((x.type.tensor_type.shape.dim for x in graph.input if x.name == inp))
+    inp_shape = next(x.type.tensor_type.shape.dim for x in graph.input if x.name == inp)
     return tuple(x.dim_value for x in inp_shape)
 
 
 def get_input_shape(graph, node):
-    """ Return the input shapes of the node in the model
+    """Return the input shapes of the node in the model
 
     Arguments:
         graph:  the onnx graph
@@ -147,7 +151,7 @@ def get_constant_value(graph, constant_name):
 def compute_pads_1d(operation, layer):
     auto_pad = get_onnx_attribute(operation, 'auto_pad', 'NOTSET')
     if auto_pad != 'NOTSET':
-        if (layer['in_width'] % layer['stride_width'] == 0):
+        if layer['in_width'] % layer['stride_width'] == 0:
             pad_along_width = max(layer['filt_width'] - layer['stride_width'], 0)
         else:
             pad_along_width = max(layer['filt_width'] - (layer['in_width'] % layer['stride_width']), 0)
@@ -158,7 +162,7 @@ def compute_pads_1d(operation, layer):
             pads = sorted(pads)
         elif auto_pad == 'SAME_LOWER':
             pads = sorted(pads, reverse=True)
-        else: # 'VALID' padding
+        else:  # 'VALID' padding
             pads = [0, 0]
     else:
         pads = get_onnx_attribute(operation, 'pads', [0, 0])
@@ -169,15 +173,15 @@ def compute_pads_1d(operation, layer):
 def compute_pads_2d(operation, layer):
     auto_pad = get_onnx_attribute(operation, 'auto_pad', 'NOTSET')
     if auto_pad != 'NOTSET':
-        #Height
-        if (layer['in_height'] % layer['stride_height'] == 0):
+        # Height
+        if layer['in_height'] % layer['stride_height'] == 0:
             pad_along_height = max(layer['filt_height'] - layer['stride_height'], 0)
         else:
             pad_along_height = max(layer['filt_height'] - (layer['in_height'] % layer['stride_height']), 0)
         pad_height = [pad_along_height // 2, pad_along_height - pad_along_height // 2]
 
-        #Width
-        if (layer['in_width'] % layer['stride_width'] == 0):
+        # Width
+        if layer['in_width'] % layer['stride_width'] == 0:
             pad_along_width = max(layer['filt_width'] - layer['stride_width'], 0)
         else:
             pad_along_width = max(layer['filt_width'] - (layer['in_width'] % layer['stride_width']), 0)
@@ -187,31 +191,37 @@ def compute_pads_2d(operation, layer):
             pads = [min(pad_height), min(pad_width), max(pad_height), max(pad_width)]
         elif auto_pad == 'SAME_LOWER':
             pads = [max(pad_height), max(pad_width), min(pad_height), min(pad_width)]
-        else: # 'VALID' padding
+        else:  # 'VALID' padding
             pads = [0, 0, 0, 0]
     else:
         pads = get_onnx_attribute(operation, 'pads', [0, 0, 0, 0])
 
     return pads
 
-####----------------------Layer handling---------------------######
+
+# ----------------------Layer handling---------------------
 layer_handlers = {}
 
+
 def register_onnx_layer_handler(layer_name, handler_func):
     if layer_name in layer_handlers:
-        raise Exception('Layer {} already registered'.format(layer_name))
+        raise Exception(f'Layer {layer_name} already registered')
     else:
         layer_handlers[layer_name] = handler_func
 
+
 def get_supported_onnx_layers():
     return list(layer_handlers.keys())
 
+
 def onnx_handler(*args):
     def decorator(function):
         function.handles = [arg for arg in args]
         return function
+
     return decorator
 
+
 def get_out_layer_name(graph):
     """
     Get the output layer's name for the model.
@@ -222,7 +232,7 @@ def get_out_layer_name(graph):
 
 
 def onnx_to_hls(config):
-    """ Convert onnx model to hls model from configuration.
+    """Convert onnx model to hls model from configuration.
 
     Args:
         config:
@@ -247,7 +257,7 @@ def onnx_to_hls(config):
 
     reader = ONNXDataReader(model)
 
-    #Obtain list of input/ouput layers
+    # Obtain list of input/ouput layers
     all_inputs = [x.name for x in model.graph.input]
     all_initializers = [x.name for x in model.graph.initializer]
     input_layers = [x for x in all_inputs if x not in all_initializers]
@@ -275,7 +285,7 @@ def onnx_to_hls(config):
         constant_layer['class_name'] = 'Constant'
         constant_layer['value'] = get_constant_value(model.graph, constant)
 
-        #Clean the layer name for specific models
+        # Clean the layer name for specific models
         sanitize_layer_name(constant_layer)
         constant_layers[i] = constant_layer['name']
 
@@ -284,7 +294,7 @@ def onnx_to_hls(config):
     # Defined supported layers and check for unsupported layer type
     skip_layers = ['Dropout', 'Identity']
 
-    #Map inputs of skipped layers
+    # Map inputs of skipped layers
     inputs_map = {}
 
     supported_layers = get_supported_onnx_layers() + skip_layers
@@ -293,30 +303,29 @@ def onnx_to_hls(config):
     for node in model.graph.node:
 
         if node.op_type not in supported_layers:
-            raise Exception('ERROR: Unsupported operation type: {}'.format(node.op_type))
+            raise Exception(f'ERROR: Unsupported operation type: {node.op_type}')
 
         current_shape = get_input_shape(model.graph, node)
 
         if node.op_type in skip_layers:
-            #Currently supported skipped layers have only one input and output
-            #Skipped layers can follow each other (e.g., Dropout -> Flatten)
+            # Currently supported skipped layers have only one input and output
+            # Skipped layers can follow each other (e.g., Dropout -> Flatten)
 
-            #Mapping inputs
+            # Mapping inputs
             input_name = inputs_map.get(node.input[0], node.input[0])
             output_name = node.output[0]
             inputs_map[output_name] = input_name
             continue
 
-        #Process the layer
+        # Process the layer
         layer = layer_handlers[node.op_type](reader, node, inputs_map, current_shape, model.graph, config)
 
         sanitize_layer_name(layer)
         print('Layer name: {}, layer type: {}, current shape: {}'.format(layer['name'], layer['class_name'], current_shape))
         layer_list.append(layer)
 
-
     #################
-    ## Generate HLS
+    # Generate HLS
     #################
 
     print('Creating HLS model')

From 6d693cc720c973345c38761e424442930db02409 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Jan 2023 19:08:48 -0600
Subject: [PATCH 46/51] apply pre-commit on optimizer passes

---
 hls4ml/model/optimizer/passes/bn_fuse.py      | 41 ++++----
 .../model/optimizer/passes/conv_to_convxd.py  | 66 +++++++++----
 .../optimizer/passes/matmul_const_to_dense.py | 50 +++++-----
 hls4ml/model/optimizer/passes/merge_const.py  | 98 ++++++++++---------
 hls4ml/model/optimizer/passes/move_scales.py  | 48 +++++----
 hls4ml/model/optimizer/passes/nop.py          | 18 ++--
 .../passes/propagate_conv_precision.py        | 38 ++++---
 .../passes/propagate_dense_precision.py       | 21 ++--
 .../model/optimizer/passes/reshape_const.py   | 35 ++++---
 .../model/optimizer/passes/transpose_opt.py   | 32 +++---
 10 files changed, 270 insertions(+), 177 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index 533e4567a8..a01fb53314 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -1,14 +1,17 @@
 import numpy as np
 
+from hls4ml.model.layers import BatchNormalization, Conv1D, Conv2D, Dense
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import BatchNormalization, Dense, Conv1D, Conv2D
+
 
 class FuseBatchNormalization(OptimizerPass):
     def match(self, node):
         prev_node = node.get_input_node(node.inputs[0])
-        basic_match = (isinstance(node, BatchNormalization)
+        basic_match = (
+            isinstance(node, BatchNormalization)
             and isinstance(prev_node, (Dense, Conv1D, Conv2D))
-            and not prev_node.get_attr("quant_precision"))
+            and not prev_node.get_attr("quant_precision")
+        )
 
         if basic_match:
             s0 = prev_node.weights['weight'].data_unquantized
@@ -16,31 +19,31 @@ def match(self, node):
             s1 = node.weights['scale'].data_unquantized
             b1 = node.weights['bias'].data_unquantized
             scale_compatible = (
-                (prev_node.get_attr("weight_quantizer") is None
-                 and node.get_attr("scale_quantizer") is None)
+                (prev_node.get_attr("weight_quantizer") is None and node.get_attr("scale_quantizer") is None)
                 or (s0 == np.ones_like(s0)).all()
-                or (s1 == np.ones_like(s1)).all())
+                or (s1 == np.ones_like(s1)).all()
+            )
             bias_compatible = (
-                (prev_node.get_attr("bias_quantizer") is None
-                 and node.get_attr("bias_quantizer") is None)
+                (prev_node.get_attr("bias_quantizer") is None and node.get_attr("bias_quantizer") is None)
                 or (b0 == np.zeros_like(b0)).all()
-                or (b1 == np.zeros_like(b1)).all())
+                or (b1 == np.zeros_like(b1)).all()
+            )
             return scale_compatible and bias_compatible
         else:
             return False
 
-
     def transform(self, model, node):
-        """ Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values
-        """
+        """Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values"""
         parent_node = node.get_input_node()
         parent_map = parent_node.get_output_use_map()
         node_map = node.get_output_use_map()
 
-        if (len(parent_map.keys()) != 1
+        if (
+            len(parent_map.keys()) != 1
             or len(tuple(parent_map.values())[0]) != 1
             or len(node_map.keys()) != 1
-            or len(tuple(node_map.values())[0]) > 1):
+            or len(tuple(node_map.values())[0]) > 1
+        ):
             # This checks that output of both the parent and the current node
             # is used at most one time for this optimzation. (For the parent, of course it can't be 0)
             # JM:  I understand the requirement on the parent, but not on the current node.
@@ -56,10 +59,12 @@ def transform(self, model, node):
         s1 = node.weights['scale'].data_unquantized
         b1 = node.weights['bias'].data_unquantized
 
-        s_quantizer = (node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all()
-                       else prev_node.get_attr("weight_quantizer"))
-        b_quantizer = (node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all()
-                       else prev_node.get_attr("bias_quantizer"))
+        s_quantizer = (
+            node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all() else prev_node.get_attr("weight_quantizer")
+        )
+        b_quantizer = (
+            node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr("bias_quantizer")
+        )
 
         prev_node.set_attr("weight_quantizer", s_quantizer)
         prev_node.set_attr("bias_quantizer", b_quantizer)
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index aba1cf9efd..db33e8c9b3 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -1,25 +1,50 @@
 import numpy as np
+
+from hls4ml.model.layers import Constant, Conv, Conv1D, Conv2D
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import IntegerPrecisionType
-from hls4ml.model.layers import Conv, Constant, Conv1D, Conv2D
 
-_base_attributes = ('Trace', 'reuse_factor', 'in_width', 'out_width', 'n_chan', 'n_filt', 'pad_left', 'pad_right',
-    'filt_width', 'stride_width', 'dilation_width', 'in_height', 'out_height', 'pad_top', 'pad_bottom',
-    'filt_height', 'stride_height', 'dilation_height', 'strategy', 'data_format')
+_base_attributes = (
+    'Trace',
+    'reuse_factor',
+    'in_width',
+    'out_width',
+    'n_chan',
+    'n_filt',
+    'pad_left',
+    'pad_right',
+    'filt_width',
+    'stride_width',
+    'dilation_width',
+    'in_height',
+    'out_height',
+    'pad_top',
+    'pad_bottom',
+    'filt_height',
+    'stride_height',
+    'dilation_height',
+    'strategy',
+    'data_format',
+)
+
 
 class ConvToConvXD(OptimizerPass):
-    """ Convert Conv with constant to a Conv1D or Conv2D layer """
+    """Convert Conv with constant to a Conv1D or Conv2D layer"""
+
     def match(self, node):
-        is_match = (isinstance(node, Conv)
-                    and ((len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant))
-                          or (len(node.inputs) == 3
-                              and isinstance(node.get_input_node(node.inputs[1]), Constant)
-                              and isinstance(node.get_input_node(node.inputs[2]), Constant))))
+        is_match = isinstance(node, Conv) and (
+            (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant))
+            or (
+                len(node.inputs) == 3
+                and isinstance(node.get_input_node(node.inputs[1]), Constant)
+                and isinstance(node.get_input_node(node.inputs[2]), Constant)
+            )
+        )
 
         return is_match
 
     def transform(self, model, node):
-        """ Convert Conv with constant to a Conv1D or Conv2D layer """
+        """Convert Conv with constant to a Conv1D or Conv2D layer"""
 
         weight_node = node.get_input_node(node.inputs[1])
         weight_precision = weight_node.get_attr("quant_precision")
@@ -29,7 +54,7 @@ def transform(self, model, node):
             bias_node = node.get_input_node(node.inputs[2])
             bias_precision = bias_node.get_attr("quant_precision")
 
-        #creating the attributes
+        # creating the attributes
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
 
         # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
@@ -40,21 +65,22 @@ def transform(self, model, node):
             newtype = Conv2D
             attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 3, 0))
         attributes["weight_precision"] = weight_precision
-        attributes["weight_quantizer"] =  weight_node.get_attr("quantizer")
+        attributes["weight_quantizer"] = weight_node.get_attr("quantizer")
 
         if bias_node:
-            attributes["bias_data"] = bias_node.value,
-            attributes["bias_precision"] = bias_precision,
-            attributes["bias_quantizer"] =  bias_node.get_attr("quantizer")
+            attributes["bias_data"] = (bias_node.value,)
+            attributes["bias_precision"] = (bias_precision,)
+            attributes["bias_quantizer"] = bias_node.get_attr("quantizer")
         else:
             attributes["bias_data"] = np.zeros(attributes['n_filt'])
             attributes["bias_precision"] = IntegerPrecisionType(1, False)
 
-        #making new node
-        new_node = model.make_node(newtype, f"{newtype.__name__}_{node.name}", attributes,
-            [node.inputs[0]], [x for x in node.outputs])
+        # making new node
+        new_node = model.make_node(
+            newtype, f"{newtype.__name__}_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]
+        )
 
-        #removing and replacing old nodes
+        # removing and replacing old nodes
         model.remove_node(weight_node, rewire=False)
         if bias_node:
             model.remove_node(bias_node, rewire=False)
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 98b77f2147..82c7b56313 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -1,53 +1,57 @@
 import numpy as np
+
+from hls4ml.model.layers import Constant, Dense, MatMul
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import IntegerPrecisionType
-from hls4ml.model.layers import MatMul, Constant, Dense
 
 _base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t')
 
+
 class MatmulConstToDense(OptimizerPass):
     """
     Convert MatMul with constant to a dense layer. Note, this only supports the second input
     being the constant. If needed, one could add transposes to make that be the case in
     other yet to be written optimizers.
     """
+
     def match(self, node):
-        is_match = (isinstance(node, MatMul) and len(node.inputs) == 2
-                    and isinstance(node.get_input_node(node.inputs[1]), Constant))
+        is_match = (
+            isinstance(node, MatMul) and len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant)
+        )
         return is_match
 
     def transform(self, model, node):
-        """ Substitute Matmul + Constant for a single dense """
-        #determining Constant layer input
+        """Substitute Matmul + Constant for a single dense"""
+        # determining Constant layer input
         const_node = node.get_input_node(node.inputs[1])
-        other_node = node.get_input_node(node.inputs[0])
         other_var = node.get_input_variable(node.inputs[0])
 
         weight_precision = const_node.get_attr("quant_precision")
         weight_quantizer = const_node.get_attr("quantizer")
 
         in_shape = other_var.shape
-        n_in =  np.prod(in_shape)
+        n_in = np.prod(in_shape)
         out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]]
         n_out = np.prod(out_shape)
 
-        #creating the attributes
+        # creating the attributes
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({
-            "weight_data": const_node.value,
-            "weight_precision": weight_precision,
-            "weight_quantizer": weight_quantizer,
-            "bias_data": np.zeros(out_shape),
-            "bias_precision": IntegerPrecisionType(1, False),
-            "n_in": n_in,
-            "n_out": n_out
-        })
-
-        #making new node
-        new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes,
-            [node.inputs[0]], [x for x in node.outputs])
-
-        #removing and replacing old nodes
+        attributes.update(
+            {
+                "weight_data": const_node.value,
+                "weight_precision": weight_precision,
+                "weight_quantizer": weight_quantizer,
+                "bias_data": np.zeros(out_shape),
+                "bias_precision": IntegerPrecisionType(1, False),
+                "n_in": n_in,
+                "n_out": n_out,
+            }
+        )
+
+        # making new node
+        new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs])
+
+        # removing and replacing old nodes
         model.remove_node(const_node, rewire=False)
         model.replace_node(node, new_dense)
 
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 773507caa2..50c4e3cca3 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -1,18 +1,23 @@
 import numpy as np
-from hls4ml.model.layers import Merge, Constant, BatchNormalization
-from hls4ml.model.optimizer import OptimizerPass
+
 from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
+from hls4ml.model.layers import BatchNormalization, Constant, Merge
+from hls4ml.model.optimizer import OptimizerPass
 
 _base_attributes = ('Trace', 'reuse_factor', 'n_in')
 
-#TODO This doesn't yet support quantization in the constants
+# TODO This doesn't yet support quantization in the constants
+
 
 class MergeTwoConstants(OptimizerPass):
-    """ Merge of two constants makes another constant """
+    """Merge of two constants makes another constant"""
+
     def match(self, node):
-        is_match = (isinstance(node, Merge)
-                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and isinstance(node.get_input_node(node.inputs[1]), Constant))
+        is_match = (
+            isinstance(node, Merge)
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and isinstance(node.get_input_node(node.inputs[1]), Constant)
+        )
 
         return is_match
 
@@ -36,7 +41,7 @@ def transform(self, model, node):
         elif op == 'div':
             new_val = val0 / val1
         elif op == 'average':
-            new_val = np.mean( np.array([val0, val1]), axis=0 )
+            new_val = np.mean(np.array([val0, val1]), axis=0)
         elif op == 'max':
             new_val = np.maximum(val0, val1)
         elif op == 'min':
@@ -63,13 +68,19 @@ def transform(self, model, node):
 
         return True
 
+
 class MergeToBatchNormalization(OptimizerPass):
-    """ Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization """
+    """Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization"""
+
     def match(self, node):
-        is_match = (isinstance(node, Merge)
-                    and node.attributes["op"] in ("add", "sum", "sub", "mul")  # Div is separate
-                    and (isinstance(node.get_input_node(node.inputs[0]), Constant)
-                         != isinstance(node.get_input_node(node.inputs[1]), Constant)))
+        is_match = (
+            isinstance(node, Merge)
+            and node.attributes["op"] in ("add", "sum", "sub", "mul")  # Div is separate
+            and (
+                isinstance(node.get_input_node(node.inputs[0]), Constant)
+                != isinstance(node.get_input_node(node.inputs[1]), Constant)
+            )
+        )
         # note: != for booleans is xor.
         return is_match
 
@@ -122,37 +133,43 @@ def transform(self, model, node):
             scale_quantizer = const_node.get_attr("quantizer")
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({
-            "scale_data": scale,
-            "bias_data": bias,
-            "n_in": n_in,
-            "n_out": n_in,
-            "n_filt": -1,
-            "scale_precision": scale_precision,
-            "scale_quantizer": scale_quantizer,
-            "bias_precision": bias_precision,
-            "bias_quantizer": bias_quantizer
-        })
-
-        bn_layer = model.make_node(BatchNormalization, f"bn_{node.name}",
-                                   attributes,
-                                   [node.inputs[input_node_idx]], [x for x in node.outputs])
+        attributes.update(
+            {
+                "scale_data": scale,
+                "bias_data": bias,
+                "n_in": n_in,
+                "n_out": n_in,
+                "n_filt": -1,
+                "scale_precision": scale_precision,
+                "scale_quantizer": scale_quantizer,
+                "bias_precision": bias_precision,
+                "bias_quantizer": bias_quantizer,
+            }
+        )
+
+        bn_layer = model.make_node(
+            BatchNormalization, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
+        )
 
         model.remove_node(const_node, rewire=False)
         model.replace_node(node, bn_layer)
 
         return True
 
+
 class MergeToBatchNormalizationDiv(OptimizerPass):
     """
     Convert Div Merges with consant to BatchNormalization
 
     TODO:  propagate precision
     """
+
     def match(self, node):
-        is_match = (isinstance(node, Merge)
-                    and node.attributes["op"] == 'div'
-                    and isinstance(node.get_input_node(node.inputs[1]), Constant))  # only second can be const
+        is_match = (
+            isinstance(node, Merge)
+            and node.attributes["op"] == 'div'
+            and isinstance(node.get_input_node(node.inputs[1]), Constant)
+        )  # only second can be const
 
         return is_match
 
@@ -160,22 +177,15 @@ def transform(self, model, node):
         input_shape = node.get_input_variable().shape
         n_in = np.prod(input_shape)
         const_node = node.get_input_node(node.inputs[1])
-        scale = 1/const_node.value
+        scale = 1 / const_node.value
         bias = np.array(0)
 
-
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({
-            "scale_data": scale,
-            "bias_data": bias,
-            "n_in": n_in,
-            "n_out": n_in,
-            "n_filt": -1
-        })
-
-        bn_layer = model.make_node("BatchNormalization", f"bn_{node.name}",
-                                   attributes,
-                                   [node.inputs[0]], [x for x in node.outputs])
+        attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1})
+
+        bn_layer = model.make_node(
+            "BatchNormalization", f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]
+        )
 
         model.remove_node(const_node, rewire=False)
         model.replace_node(node, bn_layer)
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index 7823ca97ba..1b30ea1d17 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -4,20 +4,28 @@
 TODO:  Check that biases are properly handled. (Attempt to do it via Merge)
 
 '''
-from copy import deepcopy
 import numpy as np
-from hls4ml.model.layers import ApplyAlpha, Constant, MatMul, Conv, Merge
-from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
+
+from hls4ml.model.layers import ApplyAlpha, Constant, Conv, MatMul, Merge
 from hls4ml.model.optimizer import OptimizerPass
 
+
 class ScaleDownMatMul(OptimizerPass):
     '''Shift an ApplyAlpha below a MatMul'''
 
     def match(self, node):
-        '''Check to see if we have a MatMul with at least one input ApplyAlpha. Note, if both are this optimition runs twice'''
-        is_match = (isinstance(node, MatMul) and len(node.inputs) == 2
-                    and (isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
-                         or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)))
+        '''
+        Check to see if we have a MatMul with at least one input ApplyAlpha.
+        Note, if both are this optimition runs twice.
+        '''
+        is_match = (
+            isinstance(node, MatMul)
+            and len(node.inputs) == 2
+            and (
+                isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
+                or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
+            )
+        )
         return is_match
 
     def transform(self, model, node):
@@ -94,18 +102,21 @@ def transform(self, model, node):
         model.insert_node(new_node)
         return True
 
+
 class ScaleDownAdd(OptimizerPass):
     '''Shift an identical ApplyAlpha below a Merge (Add)'''
 
     def match(self, node):
         '''Check to see if we have an add with two ApplyAlphas with identical scale'''
-        is_match = (isinstance(node, Merge) and len(node.inputs) == 2
-                    and node.attributes["op"] == "add")
+        is_match = isinstance(node, Merge) and len(node.inputs) == 2 and node.attributes["op"] == "add"
         if is_match:
             in0 = node.get_input_node(node.inputs[0])
             in1 = node.get_input_node(node.inputs[1])
-            is_match = (isinstance(in0, ApplyAlpha) and isinstance(in1, ApplyAlpha)
-                    and (in0.weights['scale'].data_unquantized == in1.weights['scale'].data_unquantized).all())
+            is_match = (
+                isinstance(in0, ApplyAlpha)
+                and isinstance(in1, ApplyAlpha)
+                and (in0.weights['scale'].data_unquantized == in1.weights['scale'].data_unquantized).all()
+            )
         return is_match
 
     def transform(self, model, node):
@@ -137,8 +148,7 @@ class ScaleDownConv(OptimizerPass):
 
     def match(self, node):
         '''Shift an ApplyAlpha from the Weight'''
-        is_match = (isinstance(node, Conv)
-                    and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha))
+        is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
 
         return is_match
 
@@ -185,13 +195,15 @@ def transform(self, model, node):
         model.insert_node(new_node)
         return True
 
+
 class ScaleDownWeightConv(OptimizerPass):
     '''Shift an ApplyAlpha weight (from conv side) below a Conv'''
 
     def match(self, node):
         '''Shift an ApplyAlpha from the Weight'''
-        is_match = (isinstance(node, Conv) and len(node.inputs) > 1
-                    and isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha))
+        is_match = (
+            isinstance(node, Conv) and len(node.inputs) > 1 and isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
+        )
 
         return is_match
 
@@ -238,13 +250,15 @@ def transform(self, model, node):
         model.insert_node(new_node)
         return True
 
+
 class ScaleDownBiasConv(OptimizerPass):
     '''Shift an ApplyAlpha bias (from conv side) below a Conv'''
 
     def match(self, node):
         '''Shift an ApplyAlpha from the Weight'''
-        is_match = (isinstance(node, Conv) and len(node.inputs) > 2
-                    and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha))
+        is_match = (
+            isinstance(node, Conv) and len(node.inputs) > 2 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha)
+        )
 
         return is_match
 
diff --git a/hls4ml/model/optimizer/passes/nop.py b/hls4ml/model/optimizer/passes/nop.py
index 202e857ce2..bdf869cb7a 100644
--- a/hls4ml/model/optimizer/passes/nop.py
+++ b/hls4ml/model/optimizer/passes/nop.py
@@ -1,5 +1,6 @@
-from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.layers import Activation
+from hls4ml.model.optimizer import OptimizerPass
+
 
 class EliminateLinearActivation(OptimizerPass):
     def match(self, node):
@@ -9,27 +10,32 @@ def match(self, node):
             return node.get_attr('activation') == 'linear' and not cast
         else:
             return False
-    
+
     def transform(self, model, node):
         model.remove_node(node)
         return True
 
+
 class EliminateLinearActivationQuant(OptimizerPass):
     '''
     This is to optimize away lots of linear qantizations in QONNX. May have to restrict it
     more if it causes problems.
     '''
+
     def match(self, node):
         '''
         Only match if this activation is from quant node and previous node precision is not set  by a quant node already.
         '''
-        is_match = (isinstance(node, Activation) and node.get_attr('activation') == 'linear'
-                    and node.get_attr("quant_precision")
-                    and not node.get_input_node(node.inputs[0]).get_attr("quant_precision"))
+        is_match = (
+            isinstance(node, Activation)
+            and node.get_attr('activation') == 'linear'
+            and node.get_attr("quant_precision")
+            and not node.get_input_node(node.inputs[0]).get_attr("quant_precision")
+        )
         return is_match
 
     def transform(self, model, node):
-        prev_node = node.get_input_node(node.inputs[0]);
+        prev_node = node.get_input_node(node.inputs[0])
         quant_precision = node.get_attr("quant_precision")
         prev_node.set_attr("quant_precision", quant_precision)
         prev_node.set_attr("quantizer", node.get_attr("quantizer"))
diff --git a/hls4ml/model/optimizer/passes/propagate_conv_precision.py b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
index 44e14b550a..ed9c96476e 100644
--- a/hls4ml/model/optimizer/passes/propagate_conv_precision.py
+++ b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
@@ -1,14 +1,17 @@
-import numpy as np
 import math  # prefer to use math.ceil for scalar values (returns int)
-from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import NamedType, FixedPrecisionType
+
+import numpy as np
+
 from hls4ml.model.layers import Conv1D, Conv2D
-from numbers import Integral
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType, NamedType
+
 
 class PropagateConvPrecision(OptimizerPass):
-    """ Propagate precision for conv nodes. Restrict it to only cases where
+    """Propagate precision for conv nodes. Restrict it to only cases where
     the precision is set by a quant node, since otherwise the values get huge.
     """
+
     def match(self, node):
         is_match = isinstance(node, (Conv1D, Conv2D))
         return is_match
@@ -26,11 +29,15 @@ def transform(self, model, node):
         filt_height = node.get_attr('filt_height', 1)
 
         accum_precision = _propagate_type_conv(
-            input_precision, weight_precision, bias_precision,
-            num_feature_maps=num_feature_maps, filt_width=filt_width,
-            filt_height=filt_height)
+            input_precision,
+            weight_precision,
+            bias_precision,
+            num_feature_maps=num_feature_maps,
+            filt_width=filt_width,
+            filt_height=filt_height,
+        )
 
-        accum_t = NamedType('layer{}_accum_t'.format(node.index), accum_precision)
+        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
         node.set_attr('accum_t', accum_t)
 
         if not node.get_attr("quant_precision"):
@@ -39,8 +46,8 @@ def transform(self, model, node):
 
         return False
 
-def _propagate_type_conv(input_precision, weight_precision, bias_precision,
-                         num_feature_maps, filt_width, filt_height):
+
+def _propagate_type_conv(input_precision, weight_precision, bias_precision, num_feature_maps, filt_width, filt_height):
     '''
     Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
     '''
@@ -58,8 +65,13 @@ def _propagate_type_conv(input_precision, weight_precision, bias_precision,
 
     # correct for bias
     if bias_precision:
-        integer = max(integer + (bias_precision.signed and not signed),
-                        bias_precision.integer + (signed and not bias_precision.signed)) + 1
+        integer = (
+            max(
+                integer + (bias_precision.signed and not signed),
+                bias_precision.integer + (signed and not bias_precision.signed),
+            )
+            + 1
+        )
         bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
         signed = signed or bias_precision.signed
 
diff --git a/hls4ml/model/optimizer/passes/propagate_dense_precision.py b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
index 867da50d2b..39b873b951 100644
--- a/hls4ml/model/optimizer/passes/propagate_dense_precision.py
+++ b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
@@ -1,8 +1,10 @@
-import numpy as np
 import math  # prefer to use math.ceil for scalar values (returns int)
-from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import NamedType, FixedPrecisionType
+
+import numpy as np
+
 from hls4ml.model.layers import Dense
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType, NamedType
 
 
 class PropagateDensePrecision(OptimizerPass):
@@ -10,6 +12,7 @@ class PropagateDensePrecision(OptimizerPass):
     Propagate precision for Dense nodes. Restrict it to only cases where
     the precision is set by a quant node, since otherwise the values get huge.
     """
+
     def match(self, node):
         is_match = isinstance(node, Dense)
         return is_match
@@ -27,7 +30,7 @@ def transform(self, model, node):
 
         accum_precision = _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc)
 
-        accum_t = NamedType('layer{}_accum_t'.format(node.index), accum_precision)
+        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
         node.set_attr('accum_t', accum_t)
 
         if not node.get_attr("quant_precision"):
@@ -36,6 +39,7 @@ def transform(self, model, node):
 
         return False
 
+
 def _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc):
     '''
     Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
@@ -54,8 +58,13 @@ def _propagate_type_dense(input_precision, weight_precision, bias_precision, num
 
     # correct for bias
     if bias_precision:
-        integer = max(integer + (bias_precision.signed and not signed),
-                      bias_precision.integer + (signed and not bias_precision.signed)) + 1
+        integer = (
+            max(
+                integer + (bias_precision.signed and not signed),
+                bias_precision.integer + (signed and not bias_precision.signed),
+            )
+            + 1
+        )
         bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
         signed = signed or bias_precision.signed
 
diff --git a/hls4ml/model/optimizer/passes/reshape_const.py b/hls4ml/model/optimizer/passes/reshape_const.py
index 71aab4e4ec..06a279ca9a 100644
--- a/hls4ml/model/optimizer/passes/reshape_const.py
+++ b/hls4ml/model/optimizer/passes/reshape_const.py
@@ -1,13 +1,14 @@
 import numpy as np
-from hls4ml.model.optimizer import OptimizerPass
+
 from hls4ml.model.layers import Constant, Reshape
+from hls4ml.model.optimizer import OptimizerPass
+
 
 class ReshapeConstant(OptimizerPass):
-    """ Remove Constant from new shape input """
+    """Remove Constant from new shape input"""
+
     def match(self, node):
-        is_match = (isinstance(node, Reshape)
-                    and len(node.inputs) > 1
-                    and node.get_input_node(node.inputs[1]))
+        is_match = isinstance(node, Reshape) and len(node.inputs) > 1 and node.get_input_node(node.inputs[1])
 
         return is_match
 
@@ -15,22 +16,25 @@ def transform(self, model, node):
         """
         Remove Constant from new shape input. Note, input shape node is already used on initialize
         """
-        shape_node =  node.get_input_node(node.inputs[1])
+        shape_node = node.get_input_node(node.inputs[1])
         node.inputs[1] = ''
         if not isinstance(shape_node, Constant):
-            raise "Nonconstant shape inputs are not currently supported"
+            raise RuntimeError("Nonconstant shape inputs are not currently supported")
         model.remove_node(shape_node, rewire=False)
 
         return True
 
+
 class ReshapeConstantFusion(OptimizerPass):
-    """ Remove Constant from new shape input """
+    """Remove Constant from new shape input"""
+
     def match(self, node):
-        is_match = (isinstance(node, Reshape)
-                    and len(node.inputs) >= 0
-                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and (len(node.inputs) == 1
-                        or not node.get_input_node(node.inputs[1])))
+        is_match = (
+            isinstance(node, Reshape)
+            and len(node.inputs) >= 0
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and (len(node.inputs) == 1 or not node.get_input_node(node.inputs[1]))
+        )
 
         return is_match
 
@@ -38,14 +42,13 @@ def transform(self, model, node):
         """
         Change the shape of the constant
         """
-        const_node =  node.get_input_node(node.inputs[0])
+        const_node = node.get_input_node(node.inputs[0])
         target_shape = node.get_attr('target_shape')
         new_val = np.reshape(const_node.value, target_shape)
         const_node.set_attr('value', new_val)
         const_node.value = new_val
         dims = [f'{const_node.name}_{i}' for i in range(len(target_shape))]
-        self.add_output_variable(target_shape, dims, var_name=const_node.name,
-                                 precision=const_node.get_attr("precision"))
+        self.add_output_variable(target_shape, dims, var_name=const_node.name, precision=const_node.get_attr("precision"))
 
         model.remove_node(node, rewire=True)
         return True
diff --git a/hls4ml/model/optimizer/passes/transpose_opt.py b/hls4ml/model/optimizer/passes/transpose_opt.py
index f5e7c0dceb..06f8113669 100644
--- a/hls4ml/model/optimizer/passes/transpose_opt.py
+++ b/hls4ml/model/optimizer/passes/transpose_opt.py
@@ -1,33 +1,38 @@
 import numpy as np
+
+from hls4ml.model.layers import Constant, Transpose
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import Transpose, Constant
+
 
 class RemoveUselessTranspose(OptimizerPass):
     def match(self, node):
-        is_match = isinstance(node, Transpose) and\
-                   list(node.get_attr('perm')) == [0] #Useless transpose
+        is_match = isinstance(node, Transpose) and list(node.get_attr('perm')) == [0]  # Useless transpose
         return is_match
 
     def transform(self, model, node):
         """
         Remove a transpose layer if it doesn't do anything. i.e 1D input and perm = [0]
         """
-        print("Unnessary {} in the model, optimizing ...".format(node.name))
+        print(f"Unnessary {node.name} in the model, optimizing ...")
         if not node.get_output_nodes():
-            print("WARNING: {} is the output layer! No rewiring performed.".format(node.name))
-            model.remove_node(node, rewire=False) #Don't rewire if there is no output layer
+            print(f"WARNING: {node.name} is the output layer! No rewiring performed.")
+            model.remove_node(node, rewire=False)  # Don't rewire if there is no output layer
         else:
             model.remove_node(node, rewire=True)
 
         return True
 
+
 class TransposeConstantFusion(OptimizerPass):
-    """ Remove Constant from new shape input """
+    """Remove Constant from new shape input"""
+
     def match(self, node):
-        is_match = (isinstance(node, Transpose)
-                    and len(node.inputs) >= 0
-                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and list(node.get_attr('perm')) != [0])
+        is_match = (
+            isinstance(node, Transpose)
+            and len(node.inputs) >= 0
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and list(node.get_attr('perm')) != [0]
+        )
 
         return is_match
 
@@ -41,8 +46,7 @@ def transform(self, model, node):
         const_node.set_attr('value', new_val)
         const_node.value = new_val
         dims = [f'{const_node.name}_{i}' for i in range(len(perm))]
-        self.add_output_variable(new_val.shape, dims, var_name=const_node.name,
-                                 precision=const_node.get_attr("precision"))
+        self.add_output_variable(new_val.shape, dims, var_name=const_node.name, precision=const_node.get_attr("precision"))
 
         model.remove_node(node, rewire=True)
-        return True
\ No newline at end of file
+        return True

From 005fad65d0bd5ec3b670e8df4ccc977f897c701b Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 27 Jan 2023 09:31:26 -0600
Subject: [PATCH 47/51] backend pre-commit fixes

---
 .../fpga/passes/inplace_parallel_reshape.py   |  4 +++-
 .../quartus/passes/transform_types.py         | 19 ++++++++++++-------
 hls4ml/backends/template.py                   | 17 +++++++++--------
 .../backends/vivado/passes/transform_types.py | 17 ++++++++++-------
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py b/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py
index fd5cf45be7..e202ee8b00 100644
--- a/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py
+++ b/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py
@@ -1,11 +1,13 @@
-from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.layers import Reshape
+from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import InplaceTensorVariable
 
+
 class InplaceParallelReshape(OptimizerPass):
     """
     Because in io_parallel arrays are stored 1D, reshape produces no code
     """
+
     def match(self, node):
         return isinstance(node, Reshape)
 
diff --git a/hls4ml/backends/quartus/passes/transform_types.py b/hls4ml/backends/quartus/passes/transform_types.py
index 8623501860..67de32ab65 100644
--- a/hls4ml/backends/quartus/passes/transform_types.py
+++ b/hls4ml/backends/quartus/passes/transform_types.py
@@ -1,12 +1,17 @@
-
-from numpy import isin
-from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.backends.fpga.fpga_types import (
-    ACTypeConverter, HLSTypeConverter, QuartusArrayVariableConverter, QuartusInplaceArrayVariableConverter,
-    QuartusStructMemberVariableConverter, StaticWeightVariableConverter,
-    QuartusStreamVariableConverter, QuartusInplaceStreamVariableConverter)
+    ACTypeConverter,
+    HLSTypeConverter,
+    QuartusArrayVariableConverter,
+    QuartusInplaceArrayVariableConverter,
+    QuartusInplaceStreamVariableConverter,
+    QuartusStreamVariableConverter,
+    QuartusStructMemberVariableConverter,
+    StaticWeightVariableConverter,
+)
+from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.model.types import InplaceTensorVariable
 
+
 class TransformTypes(GlobalOptimizerPass):
     def __init__(self):
         self.type_converter = HLSTypeConverter(precision_converter=ACTypeConverter())
@@ -36,7 +41,7 @@ def transform(self, model, node):
                 else:
                     new_var = self.array_var_converter.convert(var, pragma='hls_register')
             else:
-                raise Exception('Unknown IOType {} in {} ({})'.format(io_type, node.name, node.class_name))
+                raise Exception(f'Unknown IOType {io_type} in {node.name} ({node.class_name})')
 
             node.set_attr(out_name, new_var)
 
diff --git a/hls4ml/backends/template.py b/hls4ml/backends/template.py
index 569e84d7ae..5cc749e012 100644
--- a/hls4ml/backends/template.py
+++ b/hls4ml/backends/template.py
@@ -1,4 +1,3 @@
-
 from hls4ml.model.optimizer.optimizer import OptimizerPass
 
 
@@ -9,7 +8,7 @@ def __init__(self, name, layer_class, attribute_name):
         if not isinstance(self.layer_class, (list, tuple, set)):
             self.layer_class = [self.layer_class]
         self.attribute_name = attribute_name
-    
+
     def match(self, node):
         for layer_cls in self.layer_class:
             if node.class_name == layer_cls.__name__:
@@ -20,13 +19,14 @@ def transform(self, model, node):
         formatted_template = self.format(node)
         node.set_attr(self.attribute_name, formatted_template)
         return False
-    
+
     def format(self, node):
         raise NotImplementedError
 
     def get_name(self):
         return self.name
-    
+
+
 class LayerConfigTemplate(Template):
     def __init__(self, layer_class):
         if isinstance(layer_class, (list, tuple, set)):
@@ -35,7 +35,7 @@ def __init__(self, layer_class):
             name = layer_class.__name__.lower()
         name += '_config_template'
         super().__init__(name, layer_class, 'config_cpp')
-    
+
     def _default_config_params(self, layer):
         params = {}
         params.update(layer.attributes)
@@ -44,8 +44,9 @@ def _default_config_params(self, layer):
 
         return params
 
+
 class FunctionCallTemplate(Template):
-    def __init__(self, layer_class, include_header=[]):
+    def __init__(self, layer_class, include_header=()):
         if isinstance(layer_class, (list, tuple, set)):
             name = '_'.join([cls.__name__.lower() for cls in layer_class])
         else:
@@ -53,11 +54,11 @@ def __init__(self, layer_class, include_header=[]):
         name += '_function_template'
         super().__init__(name, layer_class, 'function_cpp')
         self.include_header = include_header
-    
+
     def _default_function_params(self, layer):
         params = {}
         params.update(layer.attributes)
-        params['config'] = 'config{}'.format(layer.index)
+        params['config'] = f'config{layer.index}'
         params['input_t'] = layer.get_input_variable().type.name
         params['output_t'] = layer.get_output_variable().type.name
         params['input'] = layer.get_input_variable().name
diff --git a/hls4ml/backends/vivado/passes/transform_types.py b/hls4ml/backends/vivado/passes/transform_types.py
index 3b5bdf7da4..fac127de03 100644
--- a/hls4ml/backends/vivado/passes/transform_types.py
+++ b/hls4ml/backends/vivado/passes/transform_types.py
@@ -1,10 +1,13 @@
-
-from numpy import isin
-from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.backends.fpga.fpga_types import (
-    APTypeConverter, HLSTypeConverter, StaticWeightVariableConverter,
-    VivadoArrayVariableConverter, VivadoInplaceArrayVariableConverter,
-    VivadoStreamVariableConverter, VivadoInplaceStreamVariableConverter)
+    APTypeConverter,
+    HLSTypeConverter,
+    StaticWeightVariableConverter,
+    VivadoArrayVariableConverter,
+    VivadoInplaceArrayVariableConverter,
+    VivadoInplaceStreamVariableConverter,
+    VivadoStreamVariableConverter,
+)
+from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.model.types import InplaceTensorVariable
 
 
@@ -36,7 +39,7 @@ def transform(self, model, node):
                 else:
                     new_var = self.array_var_converter.convert(var, pragma='partition')
             else:
-                raise Exception('Unknown IOType {} in {} ({})'.format(io_type, node.name, node.__class__.__name__))
+                raise Exception(f'Unknown IOType {io_type} in {node.name} ({node.__class__.__name__})')
 
             node.set_attr(out_name, new_var)
 

From a79b9261f2f2ebc1109d1c224e7df1314c8aee39 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 27 Jan 2023 09:42:30 -0600
Subject: [PATCH 48/51] model pre-commit fixes

---
 hls4ml/model/optimizer/__init__.py         |  45 ++++--
 hls4ml/model/optimizer/passes/quant_opt.py | 170 ++++++++++-----------
 hls4ml/model/types.py                      |  68 ++++++---
 3 files changed, 159 insertions(+), 124 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 4a1e63d499..7ee0443985 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -1,9 +1,23 @@
-from email.mime import base
-from hls4ml.model.flow.flow import register_flow
 import os
 
-from hls4ml.model.optimizer.optimizer import OptimizerPass, GlobalOptimizerPass, LayerOptimizerPass, ModelOptimizerPass, ConfigurableOptimizerPass, register_pass, get_optimizer, optimize_model, get_available_passes, get_backend_passes, optimizer_pass, layer_optimizer, model_optimizer, extract_optimizers_from_path, extract_optimizers_from_object
-
+from hls4ml.model.flow.flow import register_flow
+from hls4ml.model.optimizer.optimizer import (  # noqa: F401
+    ConfigurableOptimizerPass,
+    GlobalOptimizerPass,
+    LayerOptimizerPass,
+    ModelOptimizerPass,
+    OptimizerPass,
+    extract_optimizers_from_object,
+    extract_optimizers_from_path,
+    get_available_passes,
+    get_backend_passes,
+    get_optimizer,
+    layer_optimizer,
+    model_optimizer,
+    optimize_model,
+    optimizer_pass,
+    register_pass,
+)
 
 opt_path = os.path.dirname(__file__) + '/passes'
 module_path = __name__ + '.passes'
@@ -45,18 +59,25 @@
     'eliminate_linear_activation',
     'propagate_dense_precision',
     'propagate_conv_precision',
-    'set_precision_concat'
+    'set_precision_concat',
 ]
 
 try:
-    import qkeras
+    import qkeras  # noqa: F401
+
     # TODO Maybe not all QKeras optmizers belong here?
-    register_flow('convert', base_convert
-        + ['output_rounding_saturation_mode', 'qkeras_factorize_alpha',
-           'extract_ternary_threshold', 'fuse_consecutive_batch_normalization'])
-    register_flow('optimize', ['fuse_consecutive_batch_normalization'] + base_optimize,
-                  requires=['convert'])
-except:
+    register_flow(
+        'convert',
+        base_convert
+        + [
+            'output_rounding_saturation_mode',
+            'qkeras_factorize_alpha',
+            'extract_ternary_threshold',
+            'fuse_consecutive_batch_normalization',
+        ],
+    )
+    register_flow('optimize', ['fuse_consecutive_batch_normalization'] + base_optimize, requires=['convert'])
+except ImportError:
     register_flow('convert', base_convert)
     register_flow('optimize', base_optimize, requires=['convert'])
 
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index 127368809c..f0a5129d52 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -3,40 +3,42 @@
 
 As a first step, QuantConstantParameters converts the extra inputs to attributes. It is always the first step
 
-The next step differs between the case of (1) unitary scale and zero offset, or (2) nonunitary scale and/or nonzero offset.
-In the first case no scaling is required, so a Quant node effectively becomes a linear activation. For the common case when this
-is applied on a constant weight, the activation is immediately merged with the weight, qantizing the weights. In case 2,
-we need to explictly scale and unscale, so the Quant node becomes 3 nodes, an ApplyAlpha node to apply a scale/shift, a
-Linear node to apply the quantization, and another ApplyAlpha to unscale/shift. We depend on optimization steps to move the
-unscaling ApplyAlpha down as needed. Again, when the Quant is a applied ot a Constant, the scaling and Linear nodes are
-immediately merged into the Constant. This is done because it simplifies some of the other optimizations.
-
-UPDATE:  Case 1 is loosened to also include power of 2 scalar scales, not just unitary scale, if 
+The next step differs between the case of (1) unitary scale and zero offset, or (2) nonunitary scale and/or
+nonzero offset. In the first case no scaling is required, so a Quant node effectively becomes a linear activation.
+For the common case when this is applied on a constant weight, the activation is immediately merged with the weight,
+qantizing the weights. In case 2, we need to explictly scale and unscale, so the Quant node becomes 3 nodes, an
+ApplyAlpha node to apply a scale/shift, a Linear node to apply the quantization, and another ApplyAlpha to unscale/shift.
+We depend on optimization steps to move the unscaling ApplyAlpha down as needed. Again, when the Quant is a applied to a
+Constant, the scaling and Linear nodes are immediately merged into the Constant. This is done because it simplifies some
+of the other optimizations.
+
+UPDATE:  Case 1 is loosened to also include power of 2 scalar scales, not just unitary scale, if
     _ALSO_INCLUDE_PO2 is set to true (the default)
 
 '''
-from copy import deepcopy
-import numpy as np
 import math  # prefer to use math.ceil for scalar values
-from hls4ml.model.types import FixedPrecisionType
-from hls4ml.model.layers import Quant, Constant, Activation, ApplyAlpha
+
+import numpy as np
+
 from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
+from hls4ml.model.layers import Activation, ApplyAlpha, Constant, Quant
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType
 
 _ALSO_MATCH_PO2 = True
 
 _base_attributes = ('Trace', 'reuse_factor')
 
+
 class QuantConstantParameters(OptimizerPass):
-    """ Remove Constant from the Qaunt node parameters (but not input[0]) """
+    """Remove Constant from the Qaunt node parameters (but not input[0])"""
+
     def match(self, node):
-        is_match = (isinstance(node, Quant)
-                    and ((node.get_input_node(node.inputs[1])
-                          and isinstance(node.get_input_node(node.inputs[1]), Constant))
-                         or (node.get_input_node(node.inputs[2])
-                             and isinstance(node.get_input_node(node.inputs[2]), Constant))
-                         or (node.get_input_node(node.inputs[3])
-                             and isinstance(node.get_input_node(node.inputs[3]), Constant))))
+        is_match = isinstance(node, Quant) and (
+            (node.get_input_node(node.inputs[1]) and isinstance(node.get_input_node(node.inputs[1]), Constant))
+            or (node.get_input_node(node.inputs[2]) and isinstance(node.get_input_node(node.inputs[2]), Constant))
+            or (node.get_input_node(node.inputs[3]) and isinstance(node.get_input_node(node.inputs[3]), Constant))
+        )
 
         return is_match
 
@@ -79,17 +81,20 @@ class QuantToActivation(OptimizerPass):
 
     UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
     '''
+
     def match(self, node):
         # only matches after the other inputs are already folded
 
-        is_match = (isinstance(node, Quant)
-                    and not isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and not node.get_input_node(node.inputs[1])
-                    and not node.get_input_node(node.inputs[2])
-                    and not node.get_input_node(node.inputs[3]))
+        is_match = (
+            isinstance(node, Quant)
+            and not isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
 
         # Only match if the scale is 1s and the zero-point is 0s
-        if is_match: # to make sure this is a quant node with inputs
+        if is_match:  # to make sure this is a quant node with inputs
             scale = node.get_attr("scale")
             bias = node.get_attr("zeropt")
             is_match = is_match and (bias == np.zeros_like(bias)).all()
@@ -128,15 +133,9 @@ def transform(self, model, node):
         precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({
-            'activation' : 'linear',
-            'quant_precision'  : precision,
-            'quantizer'  : quantizer,
-            'n_in'       : n_in
-        })
-
-        new_node = model.make_node(Activation, f'{node.name}_act',
-                                   attributes, [node.inputs[0]], [x for x in node.outputs])
+        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in})
+
+        new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
         model.replace_node(node, new_node)
 
@@ -148,17 +147,19 @@ class FuseQuantWithConstant(OptimizerPass):
     This is for the case when scale is 1 and zeropt is 0. It directly applies the quantization to a constant.
     UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
     '''
+
     def match(self, node):
         # only matches after the other inputs are already folded
-        is_match = (isinstance(node, Quant)
-                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and not node.get_input_node(node.inputs[1])
-                    and not node.get_input_node(node.inputs[2])
-                    and not node.get_input_node(node.inputs[3]))
+        is_match = (
+            isinstance(node, Quant)
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
 
         # Only match if the scale is 1s and the zero-point is 0s
-        if is_match: # to make sure this is a quant node with inputs
-            input_shape = node.get_input_variable().shape
+        if is_match:  # to make sure this is a quant node with inputs
             scale = node.get_attr("scale")
             bias = node.get_attr("zeropt")
             is_match = is_match and (bias == np.zeros_like(bias)).all()
@@ -213,16 +214,18 @@ class QuantToAlphaActivationAlpha(OptimizerPass):
 
     As an optimization, this is not called when the input is constant.
     '''
+
     def match(self, node):
         # only matches after the other inputs are already folded
-        is_match = (isinstance(node, Quant)
-                    and not isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and not node.get_input_node(node.inputs[1])
-                    and not node.get_input_node(node.inputs[2])
-                    and not node.get_input_node(node.inputs[3]))
-
-        if is_match: # to make sure this is a quant node with inputs
-            input_shape = node.get_input_variable().shape
+        is_match = (
+            isinstance(node, Quant)
+            and not isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
+
+        if is_match:  # to make sure this is a quant node with inputs
             scale = node.get_attr("scale")
             bias = node.get_attr("zeropt")
             is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
@@ -247,15 +250,9 @@ def transform(self, model, node):
         precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({
-            'activation' : 'linear',
-            'quant_precision'  : precision,
-            'quantizer'  : quantizer,
-            'n_in'       : n_in
-        })
-
-        new_node = model.make_node(Activation, f'{node.name}_act',
-                                   attributes, [node.inputs[0]], [x for x in node.outputs])
+        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in})
+
+        new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
         model.replace_node(node, new_node)
 
@@ -265,20 +262,12 @@ def transform(self, model, node):
         bias = node.get_attr("zeropt")
 
         attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes_scale.update({
-            'n_in': n_in,
-            'n_out': n_in,
-            'n_filt': -1
-        })
+        attributes_scale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
 
         attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes_rescale.update({
-            'n_in': n_in,
-            'n_out': n_in,
-            'n_filt': -1
-        })
+        attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
 
-        firstscale = 1/scale
+        firstscale = 1 / scale
         firstbias = bias
         attributes_scale["scale_data"] = firstscale
         attributes_scale["bias_data"] = firstbias
@@ -287,7 +276,7 @@ def transform(self, model, node):
         model.insert_node(scale_node)
 
         rescale = scale
-        rebias = -bias*scale
+        rebias = -bias * scale
         attributes_rescale["scale_data"] = rescale
         attributes_rescale["bias_data"] = rebias
 
@@ -304,16 +293,18 @@ class ConstQuantToConstAlpha(OptimizerPass):
     consts allows for optimization, so the ApplyAlpha (to scale), Activation are
     optimized away right away.
     '''
+
     def match(self, node):
         # only matches after the other inputs are already folded
-        is_match = (isinstance(node, Quant)
-                    and isinstance(node.get_input_node(node.inputs[0]), Constant)
-                    and not node.get_input_node(node.inputs[1])
-                    and not node.get_input_node(node.inputs[2])
-                    and not node.get_input_node(node.inputs[3]))
-
-        if is_match: # to make sure this is a quant node with inputs
-            input_shape = node.get_input_variable().shape
+        is_match = (
+            isinstance(node, Quant)
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
+
+        if is_match:  # to make sure this is a quant node with inputs
             scale = node.get_attr("scale")
             bias = node.get_attr("zeropt")
             is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
@@ -352,19 +343,16 @@ def transform(self, model, node):
         const_node.initialize()
 
         attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes_rescale.update({
-            'n_in': n_in,
-            'n_out': n_in,
-            'n_filt': -1
-        })
+        attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
 
         rescale = scale
-        rebias = -bias*scale
+        rebias = -bias * scale
         attributes_rescale["scale_data"] = rescale
         attributes_rescale["bias_data"] = rebias
 
-        rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale,
-             [x for x in node.inputs], [x for x in node.outputs])
+        rescale_node = model.make_node(
+            ApplyAlpha, node.name + '_rescale', attributes_rescale, [x for x in node.inputs], [x for x in node.outputs]
+        )
         model.replace_node(node, rescale_node)
 
         return True
@@ -377,9 +365,11 @@ def _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_m
     if rounding_mode == "ROUND":
         bn_round = "AP_RND_CONV"
     elif rounding_mode == "FLOOR":
-        bn_round =  "AP_TRN"
+        bn_round = "AP_TRN"
     else:
-        raise NotImplementedError(f"Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported.")
+        raise NotImplementedError(
+            f"Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported."
+        )
 
     if narrow and not signed:
         raise NotImplementedError("Narrow mode is only supported for singed numbers.")
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index 20777d2d20..84fa6eff1e 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -1,9 +1,10 @@
-from enum import Enum
 import re
+from enum import Enum
+
 import numpy as np
 
 
-class Quantizer(object):
+class Quantizer:
     def __init__(self, bits, hls_type):
         self.bits = bits
         self.hls_type = hls_type
@@ -11,6 +12,7 @@ def __init__(self, bits, hls_type):
     def __call__(self, data):
         raise NotImplementedError
 
+
 class RoundingMode(Enum):
     TRN = 1
     TRN_ZERO = 2
@@ -30,6 +32,7 @@ def from_string(cls, mode):
 
         return cls[mode]
 
+
 class SaturationMode(Enum):
     WRAP = 1
     SAT = 2
@@ -46,11 +49,13 @@ def from_string(cls, mode):
 
         return cls[mode]
 
-class PrecisionType(object):
+
+class PrecisionType:
     def __init__(self, width, signed):
         self.width = width
         self.signed = signed
 
+
 class IntegerPrecisionType(PrecisionType):
     def __init__(self, width=16, signed=True):
         super().__init__(width=width, signed=signed)
@@ -69,11 +74,12 @@ def __eq__(self, other):
         eq = eq and self.fractional == other.fractional
         return eq
 
+
 class FixedPrecisionType(PrecisionType):
     def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturation_mode=None, saturation_bits=None):
         super().__init__(width=width, signed=signed)
         self.integer = integer
-        self.fractional = width-integer
+        self.fractional = width - integer
         self.rounding_mode = rounding_mode
         self.saturation_mode = saturation_mode
         self.saturation_bits = saturation_bits
@@ -116,34 +122,40 @@ def __eq__(self, other):
         eq = eq and self.saturation_bits == other.saturation_bits
         return eq
 
+
 class XnorPrecisionType(IntegerPrecisionType):
     '''
     Convenience class to differentiate 'regular' integers from BNN Xnor ones
     '''
+
     def __init__(self):
         super().__init__(width=1, signed=False)
 
+
 class ExponentPrecisionType(IntegerPrecisionType):
     '''
-    Convenience class to differentiate 'regular' integers from those which represent exponents, for QKeras po2 quantizers, for example.
+    Convenience class to differentiate 'regular' integers from those which represent exponents,
+    for QKeras po2 quantizers, for example.
     '''
+
     def __init__(self, width=16, signed=True):
         super().__init__(width=width, signed=signed)
 
+
 def find_minimum_width(data, signed=True):
     """
     Helper function to find the minimum integer width to express all entries in the data array
     without saturation / overflow
     """
     maxdata = np.amax(np.abs(data))
-    if maxdata == 0.:
+    if maxdata == 0.0:
         # fringe case (amax(abs(data)) == 0 -> data is uniformly zero)
         return 1
 
     log2max = np.log2(maxdata)
 
     iwidth = max(0, int(np.ceil(log2max)))
-    if iwidth == int(np.floor(log2max)): # is a power-of-two integer -> need one extra bit
+    if iwidth == int(np.floor(log2max)):  # is a power-of-two integer -> need one extra bit
         iwidth += 1
 
     if signed:
@@ -152,28 +164,32 @@ def find_minimum_width(data, signed=True):
 
     return iwidth
 
-class NamedType(object):
+
+class NamedType:
     def __init__(self, name, precision, **kwargs):
         self.name = name.format(**kwargs)
         self.precision = precision
 
+
 class CompressedType(NamedType):
     def __init__(self, name, precision, index_precision, **kwargs):
         if not name.startswith('compressed_'):
             name = 'compressed_' + name
-        super(CompressedType, self).__init__(name, precision, **kwargs)
+        super().__init__(name, precision, **kwargs)
         self.index_precision = index_precision
 
+
 class ExponentType(NamedType):
     def __init__(self, name, precision, **kwargs):
         if not name.startswith('exponent_'):
             name = 'exponent_' + name
-        super(ExponentType, self).__init__(name, precision, **kwargs)
+        super().__init__(name, precision, **kwargs)
         self.sign = XnorPrecisionType()
 
+
 class PackedType(NamedType):
     def __init__(self, name, precision, n_elem, n_pack, **kwargs):
-        super(PackedType, self).__init__(name, precision, **kwargs)
+        super().__init__(name, precision, **kwargs)
         self.n_elem = n_elem
         if n_pack < 0:
             self.n_pack = -n_pack
@@ -182,14 +198,16 @@ def __init__(self, name, precision, n_elem, n_pack, **kwargs):
             self.n_pack = n_pack
             self.unpack = False
 
-class Variable(object):
+
+class Variable:
     def __init__(self, var_name, atype, **kwargs):
         self.name = var_name.format(**kwargs)
         self.type = atype
 
+
 class TensorVariable(Variable):
     def __init__(self, shape, dim_names, var_name='layer{index}', type_name='layer{index}_t', precision=None, **kwargs):
-        super(TensorVariable, self).__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs)
+        super().__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs)
         self.shape = shape
         self.dim_names = dim_names
 
@@ -203,11 +221,13 @@ def size(self):
         return nelem
 
     def size_cpp(self):
-        #TODO get rid of size_cpp() (and dim_names)
+        # TODO get rid of size_cpp() (and dim_names)
         return '*'.join([str(k) for k in self.dim_names])
 
+
 class InplaceTensorVariable(TensorVariable):
     '''A TensorVariable that is just a link to another'''
+
     def __init__(self, tv, input_var):
         '''
         Always created with a passed in TensorVariable tv
@@ -217,9 +237,10 @@ def __init__(self, tv, input_var):
         self.type = input_var.type
         self.input_var = input_var
 
+
 class WeightVariable(Variable):
     def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs):
-        super(WeightVariable, self).__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs)
+        super().__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs)
         self.data = data
         self.nzeros = -1
         self.shape = list(self.data.shape)
@@ -258,20 +279,21 @@ def update_precision(self, new_precision):
                 width_bits = int(precision_bits[0])
                 integer_bits = int(precision_bits[1])
                 fractional_bits = integer_bits - width_bits
-                lsb = 2 ** fractional_bits
+                lsb = 2**fractional_bits
                 if lsb < 1:
                     # Use str to represent the float with digits, get the length
                     # to right of decimal point
                     decimal_spaces = len(str(lsb).split('.')[1])
                 else:
                     decimal_spaces = len(str(2**integer_bits))
-                self.precision_fmt = '%.{}f'.format(decimal_spaces)
+                self.precision_fmt = f'%.{decimal_spaces}f'
             else:
                 self.precision_fmt = '%f'
 
+
 class CompressedWeightVariable(WeightVariable):
     def __init__(self, var_name, type_name, precision, data, reuse_factor, quantizer=None, **kwargs):
-        super(CompressedWeightVariable, self).__init__(var_name, type_name, precision, data, quantizer=quantizer, **kwargs)
+        super().__init__(var_name, type_name, precision, data, quantizer=quantizer, **kwargs)
         self.extra_zeros = 0
         self.data_length = np.prod(data.shape) - self.nzeros
         while self.data_length % reuse_factor != 0:
@@ -315,9 +337,10 @@ def __next__(self):
 
     next = __next__
 
+
 class ExponentWeightVariable(WeightVariable):
     def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs):
-        super(ExponentWeightVariable, self).__init__(var_name, type_name, precision, data, quantizer, **kwargs)
+        super().__init__(var_name, type_name, precision, data, quantizer, **kwargs)
         '''
         WeightVariable for Exponent aka po2 data. The data should already by quantized by the quantizer.
         '''
@@ -330,7 +353,7 @@ def _format(self):
         sign = np.where(y < 0, np.zeros_like(y), np.ones_like(y))
         # Take the logarithm, since this is what we will write to the header
         # for the optimized product using shifts
-        y = (np.log2(np.abs(y)) / np.log2(2.)).astype('int')
+        y = (np.log2(np.abs(y)) / np.log2(2.0)).astype('int')
         return np.stack((sign, y), axis=-1)
 
     def __iter__(self):
@@ -345,9 +368,10 @@ def __next__(self):
 
     next = __next__
 
-class Source(object):
+
+class Source:
     def __init__(self, code):
         self.code = code
-    
+
     def __str__(self):
         return str(self.code)

From 3382967a8a508b37eb7c988a45c42f01dfcc5345 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 27 Jan 2023 09:46:32 -0600
Subject: [PATCH 49/51] pytest pre-commit fixes

---
 test/pytest/test_qonnx.py   | 59 +++++++++++++++++++------------------
 test/pytest/test_reshape.py | 44 ++++++++++++++-------------
 2 files changed, 54 insertions(+), 49 deletions(-)

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index e20a69c967..265901da88 100755
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -1,14 +1,18 @@
 #!/usr/bin/env python
-import pytest
-import hls4ml
+import os
+import urllib
+
 import numpy as np
+import pytest
+import qonnx.core.onnx_exec as oxe
 import qonnx.util.cleanup
 import qonnx.util.to_channels_last
-import urllib
-import os
+
 # To conveniently run QONNX inference
 from qonnx.core.modelwrapper import ModelWrapper
-import qonnx.core.onnx_exec as oxe
+
+import hls4ml
+
 
 def test_tfc_2w2a():
     # download test model
@@ -28,7 +32,7 @@ def test_tfc_2w2a():
 
     # Execute QONNX model inference
     # TODO make the test bigger
-    ishape = (1,1,28,28)
+    ishape = (1, 1, 28, 28)
     np.random.seed(0)
     X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
@@ -39,16 +43,16 @@ def test_tfc_2w2a():
     # Some hand-derived config
     # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
     config['LayerName'] = {}
-    config['LayerName']['global_in'] = {'Precision' : 'ap_fixed<16,2>'}
-    hls_model = hls4ml.converters.convert_from_onnx_model(model,
-                                                          output_dir='hls4mlprj_qonnx_tfc-2w2a',
-                                                          part='xcu250-figd2104-2L-e',
-                                                          hls_config=config)
+    config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'}
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model, output_dir='hls4mlprj_qonnx_tfc-2w2a', part='xcu250-figd2104-2L-e', hls_config=config
+    )
     hls_model.compile()
     y_hls4ml = hls_model.predict(X)
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
+
 def test_tfc_2w2a_quartus():
     # download test model
     dl_dir = "./"
@@ -67,7 +71,7 @@ def test_tfc_2w2a_quartus():
 
     # Execute QONNX model inference
     # TODO make the test bigger
-    ishape = (1,1,28,28)
+    ishape = (1, 1, 28, 28)
     np.random.seed(0)
     X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
@@ -78,17 +82,16 @@ def test_tfc_2w2a_quartus():
     # Some hand-derived config
     # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
     config['LayerName'] = {}
-    config['LayerName']['global_in'] = {'Precision' : 'ac_fixed<16,2>'}
-    hls_model = hls4ml.converters.convert_from_onnx_model(model,
-                                                          output_dir='hls4mlprj_qonnx_tfc-2w2a-quartus',
-                                                          part='Arria10',
-                                                          backend='Quartus',
-                                                          hls_config=config)
+    config['LayerName']['global_in'] = {'Precision': 'ac_fixed<16,2>'}
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model, output_dir='hls4mlprj_qonnx_tfc-2w2a-quartus', part='Arria10', backend='Quartus', hls_config=config
+    )
     hls_model.compile()
     y_hls4ml = hls_model.predict(X)
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
+
 def test_cnv_2w2a():
     # download test model
     dl_dir = "./"
@@ -111,7 +114,7 @@ def test_cnv_2w2a():
 
     # Execute QONNX model inference
     # TODO make the test bigger
-    ishape = (1,32,32,3)
+    ishape = (1, 32, 32, 3)
     np.random.seed(1)
     X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
@@ -123,16 +126,15 @@ def test_cnv_2w2a():
     # Some hand-derived config
     # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
 
-    hls_model = hls4ml.converters.convert_from_onnx_model(model,
-                                                          output_dir='hls4mlprj_qonnx_cnv-2w2a',
-                                                          part='xcu250-figd2104-2L-e',
-                                                          io_type='io_stream',
-                                                          hls_config=config)
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model, output_dir='hls4mlprj_qonnx_cnv-2w2a', part='xcu250-figd2104-2L-e', io_type='io_stream', hls_config=config
+    )
     hls_model.compile()
     y_hls4ml = hls_model.predict(X)
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
+
 @pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
 def test_jet_tagging(backend):
     # download test model
@@ -152,7 +154,7 @@ def test_jet_tagging(backend):
 
     # Execute QONNX model inference
     # TODO make the test bigger
-    ishape = (1,16)
+    ishape = (1, 16)
     np.random.seed(0)
     X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
@@ -163,10 +165,9 @@ def test_jet_tagging(backend):
     # Some hand-derived config
     # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
 
-    hls_model = hls4ml.converters.convert_from_onnx_model(model,
-                                                          output_dir=f'hls4mlprj_qonnx_jettag_{backend}',
-                                                          backend=backend,
-                                                          hls_config=config)
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model, output_dir=f'hls4mlprj_qonnx_jettag_{backend}', backend=backend, hls_config=config
+    )
     hls_model.compile()
     y_hls4ml = hls_model.predict(X)
 
diff --git a/test/pytest/test_reshape.py b/test/pytest/test_reshape.py
index c8f8c2c6d0..8b0421be95 100755
--- a/test/pytest/test_reshape.py
+++ b/test/pytest/test_reshape.py
@@ -2,37 +2,41 @@
 """
 
 import pytest
-import hls4ml
 import tensorflow as tf
-import numpy as np
-from tensorflow.keras import optimizers
-from tensorflow.keras.layers import Input, Dense, Reshape, Softmax
+
+import hls4ml
+
 
 @pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
 def test_reshape_parallel(backend):
-    model = tf.keras.models.Sequential([
-        tf.keras.layers.Input((10)),
-        tf.keras.layers.Dense(10*3),
-        tf.keras.layers.Reshape((10,3)),
-        tf.keras.layers.ReLU()
-    ])
+    model = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Input(10),
+            tf.keras.layers.Dense(10 * 3),
+            tf.keras.layers.Reshape((10, 3)),
+            tf.keras.layers.ReLU(),
+        ]
+    )
     model.compile(optimizer='adam', loss='mse')
     config = hls4ml.utils.config_from_keras_model(model)
     output_dir = f'hls4mlprj_reshape_parallel_{backend}'
-    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config,
-                                                           output_dir=output_dir,
-                                                           backend=backend)
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, backend=backend)
     hls_model.compile()
 
+
 def test_reshape_stream():
-    model = tf.keras.models.Sequential([
-        tf.keras.layers.Input((10)),
-        tf.keras.layers.Dense(10*3),
-        tf.keras.layers.Reshape((10,3)),
-        tf.keras.layers.ReLU()
-    ])
+    model = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Input(10),
+            tf.keras.layers.Dense(10 * 3),
+            tf.keras.layers.Reshape((10, 3)),
+            tf.keras.layers.ReLU(),
+        ]
+    )
     model.compile(optimizer='adam', loss='mse')
     config = hls4ml.utils.config_from_keras_model(model)
     output_dir = 'hls4mlprj_reshape_stream'
-    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type='io_stream')
+    hls_model = hls4ml.converters.convert_from_keras_model(
+        model, hls_config=config, output_dir=output_dir, io_type='io_stream'
+    )
     hls_model.compile()

From c78c99e25d215685cff43bac0a2f61b7aa6d9d24 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 27 Jan 2023 09:49:15 -0600
Subject: [PATCH 50/51] pytest pre-commit fixes:  trailing whitespace

---
 test/pytest/ci-template.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/pytest/ci-template.yml b/test/pytest/ci-template.yml
index 45066e7b80..88002cf81f 100644
--- a/test/pytest/ci-template.yml
+++ b/test/pytest/ci-template.yml
@@ -1,7 +1,7 @@
 .pytest:
   stage: test
   image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.3b0.base
-  tags: 
+  tags:
     - docker
   before_script:
     - source ~/.bashrc
@@ -14,7 +14,7 @@
   artifacts:
     when: always
     reports:
-      junit: 
+      junit:
         - test/pytest/report.xml
       coverage_report:
         coverage_format: cobertura

From 388a1d41ad89ce6d96f6e92a7069d3267f8d79d5 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 27 Jan 2023 19:23:55 -0600
Subject: [PATCH 51/51] mark batch dimension in input

---
 hls4ml/converters/onnx_to_hls.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py
index a7d7c3e32a..4e2c804e07 100644
--- a/hls4ml/converters/onnx_to_hls.py
+++ b/hls4ml/converters/onnx_to_hls.py
@@ -270,7 +270,10 @@ def onnx_to_hls(config):
         input_layer = {}
         input_layer['name'] = replace_char_inconsitency(inp)
         input_layer['class_name'] = 'InputLayer'
-        input_layer['input_shape'] = get_global_input_shape(model.graph, inp)
+        inp_shape = get_global_input_shape(model.graph, inp)
+        # We only support ONNX where the first dimension is the batch dimension
+        # Mark it with None
+        input_layer['input_shape'] = (None,) + inp_shape[1:]
 
         print('Input shape:', input_layer['input_shape'])
         # Clean the layer name for specific models