diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py index 6b1e63a469..ceac0b5e4d 100644 --- a/hls4ml/backends/fpga/fpga_types.py +++ b/hls4ml/backends/fpga/fpga_types.py @@ -1,18 +1,30 @@ import numpy as np -from hls4ml.model.types import CompressedType, NamedType, ExponentType, FixedPrecisionType, IntegerPrecisionType, XnorPrecisionType, ExponentPrecisionType, TensorVariable, PackedType, WeightVariable +from hls4ml.model.types import ( + CompressedType, + ExponentPrecisionType, + ExponentType, + FixedPrecisionType, + IntegerPrecisionType, + NamedType, + PackedType, + XnorPrecisionType, +) -#region Precision types +# region Precision types -class PrecisionDefinition(object): + +class PrecisionDefinition: def definition_cpp(self): raise NotImplementedError + class APIntegerPrecisionDefinition(PrecisionDefinition): def definition_cpp(self): typestring = 'ap_{signed}int<{width}>'.format(signed='u' if not self.signed else '', width=self.width) return typestring + class APFixedPrecisionDefinition(PrecisionDefinition): def _rounding_mode_cpp(self, mode): if mode is not None: @@ -23,16 +35,24 @@ def _saturation_mode_cpp(self, mode): return 'AP_' + str(mode) def definition_cpp(self): - args = [self.width, self.integer, self._rounding_mode_cpp(self.rounding_mode), self._saturation_mode_cpp(self.saturation_mode), self.saturation_bits] + args = [ + self.width, + self.integer, + self._rounding_mode_cpp(self.rounding_mode), + self._saturation_mode_cpp(self.saturation_mode), + self.saturation_bits, + ] args = ','.join([str(arg) for arg in args if arg is not None]) typestring = 'ap_{signed}fixed<{args}>'.format(signed='u' if not self.signed else '', args=args) return typestring + class ACIntegerPrecisionDefinition(PrecisionDefinition): def definition_cpp(self): - typestring = 'ac_int<{width}, {signed}>'.format(width=self.width, signed=str(self.signed).lower()) + typestring = f'ac_int<{self.width}, {str(self.signed).lower()}>' return typestring + class ACFixedPrecisionDefinition(PrecisionDefinition): def _rounding_mode_cpp(self, mode): if mode is not None: @@ -43,15 +63,24 @@ def _saturation_mode_cpp(self, mode): return 'AC_' + str(mode) def definition_cpp(self): - args = [self.width, self.integer, str(self.signed).lower(), self._rounding_mode_cpp(self.rounding_mode), self._saturation_mode_cpp(self.saturation_mode), self.saturation_bits] + args = [ + self.width, + self.integer, + str(self.signed).lower(), + self._rounding_mode_cpp(self.rounding_mode), + self._saturation_mode_cpp(self.saturation_mode), + self.saturation_bits, + ] args = ','.join([str(arg) for arg in args if arg is not None]) - typestring = 'ac_fixed<{args}>'.format(args=args) + typestring = f'ac_fixed<{args}>' return typestring -class PrecisionConverter(object): + +class PrecisionConverter: def convert(self, precision_type): raise NotImplementedError + class FixedPrecisionConverter(PrecisionConverter): def __init__(self, type_map, prefix): self.type_map = type_map @@ -71,7 +100,8 @@ def convert(self, precision_type): precision_type.__class__ = type(self.prefix + type_cls_name, (type_cls, definition_cls), {}) return precision_type else: - raise Exception('Cannot convert precision type to {}: {}'.format(self.prefix, precision_type.__class__.__name__)) + raise Exception(f'Cannot convert precision type to {self.prefix}: {precision_type.__class__.__name__}') + class APTypeConverter(FixedPrecisionConverter): def __init__(self): @@ -82,9 +112,10 @@ def __init__(self): ExponentPrecisionType: APIntegerPrecisionDefinition, XnorPrecisionType: APIntegerPrecisionDefinition, }, - prefix='AP' + prefix='AP', ) + class ACTypeConverter(FixedPrecisionConverter): def __init__(self): super().__init__( @@ -94,58 +125,61 @@ def __init__(self): ExponentPrecisionType: ACIntegerPrecisionDefinition, XnorPrecisionType: ACIntegerPrecisionDefinition, }, - prefix='AC' + prefix='AC', ) -#endregion -#region Data types +# endregion + +# region Data types + -class TypeDefinition(object): +class TypeDefinition: def definition_cpp(self): raise NotImplementedError -class TypePrecisionConverter(object): + +class TypePrecisionConverter: def convert_precision(self, precision_converter): self.precision = precision_converter.convert(self.precision) + class NamedTypeConverter(TypeDefinition, TypePrecisionConverter): def definition_cpp(self): - return 'typedef {precision} {name};\n'.format(name=self.name, precision=self.precision.definition_cpp()) + return f'typedef {self.precision.definition_cpp()} {self.name};\n' + class CompressedTypeConverter(TypeDefinition, TypePrecisionConverter): def definition_cpp(self): - cpp_fmt = ( - 'typedef struct {name} {{' - '{index} row_index;' - '{index} col_index;' - '{precision} weight; }} {name};\n' - ) + cpp_fmt = 'typedef struct {name} {{' '{index} row_index;' '{index} col_index;' '{precision} weight; }} {name};\n' return cpp_fmt.format(name=self.name, index=self.index_precision, precision=self.precision.definition_cpp()) def convert_precision(self, precision_converter): super().convert_precision(precision_converter) self.index_precision = precision_converter.convert(self.index_precision) + class ExponentTypeConverter(TypeDefinition, TypePrecisionConverter): def definition_cpp(self): - cpp_fmt = ( - 'typedef struct {name} {{' - '{sign} sign;' - '{precision} weight; }} {name};\n' - ) + cpp_fmt = 'typedef struct {name} {{' '{sign} sign;' '{precision} weight; }} {name};\n' return cpp_fmt.format(name=self.name, precision=self.precision.definition_cpp(), sign=self.sign.definition_cpp()) def convert_precision(self, precision_converter): super().convert_precision(precision_converter) self.sign = precision_converter.convert(self.sign) + class PackedTypeConverter(TypeDefinition, TypePrecisionConverter): def definition_cpp(self): n_elem_expr = '/' if self.unpack else '*' - return 'typedef nnet::array<{precision}, {n_elem}> {name};\n'.format(name=self.name, precision=self.precision.definition_cpp(), n_elem=str(self.n_elem) + n_elem_expr + str(self.n_pack)) + return 'typedef nnet::array<{precision}, {n_elem}> {name};\n'.format( + name=self.name, + precision=self.precision.definition_cpp(), + n_elem=str(self.n_elem) + n_elem_expr + str(self.n_pack), + ) -class HLSTypeConverter(object): + +class HLSTypeConverter: def __init__(self, precision_converter): self.precision_converter = precision_converter self.type_map = { @@ -170,34 +204,54 @@ def convert(self, atype): atype.convert_precision(self.precision_converter) return atype else: - raise Exception('Cannot convert type: {}'.format(atype.__class__.__name__)) + raise Exception(f'Cannot convert type: {atype.__class__.__name__}') + -#endregion +# endregion -#region Variables +# region Variables -class VariableDefinition(object): + +class VariableDefinition: def definition_cpp(self, name_suffix='', as_reference=False): raise NotImplementedError -#region ArrayVariable + +# region ArrayVariable + class VivadoArrayVariableDefinition(VariableDefinition): def definition_cpp(self, name_suffix='', as_reference=False): - return '{type} {name}{suffix}[{shape}]'.format(type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp()) + return '{type} {name}{suffix}[{shape}]'.format( + type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp() + ) + class QuartusArrayVariableDefinition(VariableDefinition): def definition_cpp(self, name_suffix='', as_reference=False): - return '{type} {name}{suffix}[{shape}] {pragma}'.format(type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp(), pragma=self.pragma) + return '{type} {name}{suffix}[{shape}] {pragma}'.format( + type=self.type.name, name=self.name, suffix=name_suffix, shape=self.size_cpp(), pragma=self.pragma + ) + + +class VivadoInplaceArrayVariableDefinition(VariableDefinition): + def definition_cpp(self): + return f'auto& {self.name} = {self.input_var.name}' + -class ArrayVariableConverter(object): +class QuartusInplaceArrayVariableDefinition(VariableDefinition): + def definition_cpp(self): + return f'auto& {self.name} = {self.input_var.name}' + + +class ArrayVariableConverter: def __init__(self, type_converter, prefix, definition_cls): self.type_converter = type_converter self.prefix = prefix self.definition_cls = definition_cls def convert(self, tensor_var, pragma='partition'): - if isinstance(tensor_var, self.definition_cls): # Already converted + if isinstance(tensor_var, self.definition_cls): # Already converted return tensor_var tensor_var.pragma = pragma @@ -206,30 +260,49 @@ def convert(self, tensor_var, pragma='partition'): tensor_var.__class__ = type(self.prefix + 'ArrayVariable', (type(tensor_var), self.definition_cls), {}) return tensor_var + class VivadoArrayVariableConverter(ArrayVariableConverter): def __init__(self, type_converter): super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoArrayVariableDefinition) + class QuartusArrayVariableConverter(ArrayVariableConverter): def __init__(self, type_converter): super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusArrayVariableDefinition) -#endregion -#region StructMemberVariable +class VivadoInplaceArrayVariableConverter(ArrayVariableConverter): + def __init__(self, type_converter): + super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoInplaceArrayVariableDefinition) + + +class QuartusInplaceArrayVariableConverter(ArrayVariableConverter): + def __init__(self, type_converter): + super().__init__( + type_converter=type_converter, prefix='Quartus', definition_cls=QuartusInplaceArrayVariableDefinition + ) + + +# endregion + +# region StructMemberVariable + class QuartusStructMemberVariableDefinition(VariableDefinition): def definition_cpp(self, name_suffix='', as_reference=False): - return '{type} {name}{suffix}[{shape}]'.format(type=self.type.name, name=self.member_name, suffix=name_suffix, shape=self.size_cpp()) + return '{type} {name}{suffix}[{shape}]'.format( + type=self.type.name, name=self.member_name, suffix=name_suffix, shape=self.size_cpp() + ) -class StructMemberVariableConverter(object): + +class StructMemberVariableConverter: def __init__(self, type_converter, prefix, definition_cls): self.type_converter = type_converter self.prefix = prefix self.definition_cls = definition_cls def convert(self, tensor_var, pragma='partition', struct_name=None): - if isinstance(tensor_var, self.definition_cls): # Already converted + if isinstance(tensor_var, self.definition_cls): # Already converted return tensor_var tensor_var.pragma = pragma @@ -243,97 +316,127 @@ def convert(self, tensor_var, pragma='partition', struct_name=None): tensor_var.__class__ = type(self.prefix + 'StructMemberVariable', (type(tensor_var), self.definition_cls), {}) return tensor_var + class QuartusStructMemberVariableConverter(StructMemberVariableConverter): def __init__(self, type_converter): - super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusStructMemberVariableDefinition) + super().__init__( + type_converter=type_converter, prefix='Quartus', definition_cls=QuartusStructMemberVariableDefinition + ) + -#endregion +# endregion + +# region StreamVariable -#region StreamVariable class VivadoStreamVariableDefinition(VariableDefinition): def definition_cpp(self, name_suffix='', as_reference=False): - if as_reference: # Function parameter - return 'hls::stream<{type}> &{name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix) - else: # Declaration - return 'hls::stream<{type}> {name}{suffix}("{name}")'.format(type=self.type.name, name=self.name, suffix=name_suffix) + if as_reference: # Function parameter + return f'hls::stream<{self.type.name}> &{self.name}{name_suffix}' + else: # Declaration + return 'hls::stream<{type}> {name}{suffix}("{name}")'.format( + type=self.type.name, name=self.name, suffix=name_suffix + ) + + +class VivadoInplaceStreamVariableDefinition(VariableDefinition): + def definition_cpp(self): + return f'auto& {self.name} = {self.input_var.name}' + class QuartusStreamVariableDefinition(VariableDefinition): def definition_cpp(self, name_suffix='', as_reference=False): - if as_reference: # Function parameter - return 'stream<{type}> &{name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix) - else: # Declaration - return 'stream<{type}> {name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix) + if as_reference: # Function parameter + return f'stream<{self.type.name}> &{self.name}{name_suffix}' + else: # Declaration + return f'stream<{self.type.name}> {self.name}{name_suffix}' -class StreamVariableConverter(object): + +class QuartusInplaceStreamVariableDefinition(VariableDefinition): + def definition_cpp(self): + return f'auto& {self.name} = {self.input_var.name}' + + +class StreamVariableConverter: def __init__(self, type_converter, prefix, definition_cls): self.type_converter = type_converter self.prefix = prefix self.definition_cls = definition_cls def convert(self, tensor_var, n_pack=1, depth=0): - if isinstance(tensor_var, self.definition_cls): # Already converted + if isinstance(tensor_var, self.definition_cls): # Already converted return tensor_var if depth == 0: depth = np.prod(tensor_var.shape) // tensor_var.shape[-1] tensor_var.pragma = ('stream', depth) - tensor_var.type = self.type_converter.convert(PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.shape[-1], n_pack)) + tensor_var.type = self.type_converter.convert( + PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.shape[-1], n_pack) + ) tensor_var.__class__ = type(self.prefix + 'StreamVariable', (type(tensor_var), self.definition_cls), {}) return tensor_var + class VivadoStreamVariableConverter(StreamVariableConverter): def __init__(self, type_converter): super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoStreamVariableDefinition) + class QuartusStreamVariableConverter(StreamVariableConverter): def __init__(self, type_converter): super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusStreamVariableDefinition) -#endregion -#region InplaceVariable +# endregion + +# region InplaceStreamVariable -class InplaceVariableConverter(object): - def __init__(self, type_converter, prefix): - self.type_converter = type_converter - self.prefix = prefix - def convert(self, tensor_var, io_type): - if tensor_var.__class__.__name__.startswith(self.prefix): # Already converted +class InplaceStreamVariableConverter(StreamVariableConverter): + def convert(self, tensor_var, n_pack=1, depth=0): + if isinstance(tensor_var, self.definition_cls): # Already converted return tensor_var - if io_type == 'io_stream': - tensor_var.type = self.type_converter.convert(PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.shape[-1], n_pack=1)) - else: - tensor_var.type = self.type_converter.convert(tensor_var.type) + tensor_var.pragma = None + tensor_var.type = self.type_converter.convert( + PackedType(tensor_var.type.name, tensor_var.type.precision, tensor_var.input_var.shape[-1], n_pack) + ) - tensor_var.__class__ = type(self.prefix + 'InplaceVariable', (type(tensor_var),), {}) + tensor_var.__class__ = type(self.prefix + 'StreamVariable', (type(tensor_var), self.definition_cls), {}) return tensor_var -class VivadoInplaceVariableConverter(InplaceVariableConverter): + +class VivadoInplaceStreamVariableConverter(InplaceStreamVariableConverter): def __init__(self, type_converter): - super().__init__(type_converter=type_converter, prefix='Vivado') + super().__init__( + type_converter=type_converter, prefix='Vivado', definition_cls=VivadoInplaceStreamVariableDefinition + ) + -class QuartusInplaceVariableConverter(InplaceVariableConverter): +class QuartusInplaceStreamVariableConverter(InplaceStreamVariableConverter): def __init__(self, type_converter): - super().__init__(type_converter=type_converter, prefix='Quartus') + super().__init__( + type_converter=type_converter, prefix='Quartus', definition_cls=QuartusInplaceStreamVariableDefinition + ) -#endregion -#region WeightsVariable +# endregion + +# region WeightsVariable + class StaticWeightVariableDefinition(VariableDefinition): def definition_cpp(self, name_suffix='', as_reference=False): - return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length) + return f'{self.type.name} {self.name}[{self.data_length}]' + -class StaticWeightVariableConverter(object): +class StaticWeightVariableConverter: def __init__(self, type_converter): self.type_converter = type_converter def convert(self, weight_var): - if isinstance(weight_var, StaticWeightVariableDefinition): # Already converted + if isinstance(weight_var, StaticWeightVariableDefinition): # Already converted return weight_var weight_var.weight_class = weight_var.__class__.__name__ @@ -343,12 +446,14 @@ def convert(self, weight_var): weight_var.__class__ = type('StaticWeightVariable', (type(weight_var), StaticWeightVariableDefinition), {}) return weight_var -class BramWeightVariableConverter(object): + +class BramWeightVariableConverter: @classmethod def convert(cls, weight_var): weight_var.storage = 'bram' return weight_var -#endregion -#endregion \ No newline at end of file +# endregion + +# endregion diff --git a/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py b/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py new file mode 100644 index 0000000000..532becc9db --- /dev/null +++ b/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py @@ -0,0 +1,24 @@ +from hls4ml.model.layers import Reshape +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import InplaceTensorVariable + + +class InplaceParallelReshape(OptimizerPass): + """ + Replaces the output variable of Reshape layer with an inplace variable when using io_parallel. + + This is done because in io_parallel tensors are stored as flat arrays, requiring no reshaping. + """ + + def match(self, node): + return isinstance(node, Reshape) + + def transform(self, model, node): + if model.config.get_config_value('IOType') != 'io_parallel': + return False + + outvar = node.get_output_variable() + invar = node.get_input_variable() + newoutvar = InplaceTensorVariable(outvar, invar) + node.set_attr(node.outputs[0], newoutvar) + return False diff --git a/hls4ml/backends/fpga/passes/inplace_stream_flatten.py b/hls4ml/backends/fpga/passes/inplace_stream_flatten.py new file mode 100644 index 0000000000..a16ffefc4a --- /dev/null +++ b/hls4ml/backends/fpga/passes/inplace_stream_flatten.py @@ -0,0 +1,25 @@ +from hls4ml.model.layers import Reshape +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import InplaceTensorVariable + + +class InplaceStreamFlatten(OptimizerPass): + """ + Replaces the output variable of Reshape (flatten) layer with an inplace variable when using io_stream. + + This optimizer avoids the expensive repacking of the stream when Reshape layer flattens the tensor to 1d. + """ + + def match(self, node): + # Reshape acts as a Flatten layer when the result has 1 dimension + return isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1 + + def transform(self, model, node): + if model.config.get_config_value('IOType') != 'io_stream': + return False + + outvar = node.get_output_variable() + invar = node.get_input_variable() + newoutvar = InplaceTensorVariable(outvar, invar) + node.set_attr(node.outputs[0], newoutvar) + return False diff --git a/hls4ml/backends/fpga/passes/repack_stream.py b/hls4ml/backends/fpga/passes/repack_stream.py new file mode 100644 index 0000000000..a502f0ab20 --- /dev/null +++ b/hls4ml/backends/fpga/passes/repack_stream.py @@ -0,0 +1,64 @@ +import numpy as np + +from hls4ml.backends.template import FunctionCallTemplate +from hls4ml.model.layers import Layer, Reshape, register_layer +from hls4ml.model.optimizer import OptimizerPass + + +class Repack(Layer): + '''Inserted between layers with different packing factors.''' + + def initialize(self): + shape = self.attributes['target_shape'] + if shape[0] is None: + shape = shape[1:] + dims = [f'N_SIZE_{i}_{self.index}' for i in range(1, len(shape) + 1)] + + self.add_output_variable(shape, dims) + + +repack_function_template = 'nnet::repack_stream<{input_t}, {output_t}, {size}>({input}, {output});' +repack_include_list = ['nnet_utils/nnet_stream.h'] + + +class RepackFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(Repack, include_header=repack_include_list) + self.template = repack_function_template + + def format(self, node): + params = self._default_function_params(node) + params['size'] = np.prod(node.get_output_variable().shape) + + return self.template.format(**params) + + +def register_repack_stream(backend): + # Register the layer types to the layer map + register_layer('Repack', Repack) + + # Register the optimization passes + backend.register_pass('reshape_stream', ReshapeStream) + + # Register template passes + backend.register_template(RepackFunctionTemplate) + + +class ReshapeStream(OptimizerPass): + '''Repacks stream for Reshape layer''' + + def match(self, node): + # do not run optimizer pass for a flatten layer (1 output dimension) + return isinstance(node, Reshape) and len(node.get_output_variable().shape) > 1 + + def transform(self, model, node): + if model.config.get_config_value('IOType') != 'io_stream': + return False + + attrs = {'target_shape': node.get_attr('target_shape')} + + # Insert new Repack node instead of Reshape + repack_layer = model.make_node(Repack, 'repack_' + node.name, attrs, node.inputs.copy()) + model.replace_node(node, repack_layer) + + return True diff --git a/hls4ml/backends/quartus/passes/transform_types.py b/hls4ml/backends/quartus/passes/transform_types.py index 75091a8226..67de32ab65 100644 --- a/hls4ml/backends/quartus/passes/transform_types.py +++ b/hls4ml/backends/quartus/passes/transform_types.py @@ -1,34 +1,47 @@ - +from hls4ml.backends.fpga.fpga_types import ( + ACTypeConverter, + HLSTypeConverter, + QuartusArrayVariableConverter, + QuartusInplaceArrayVariableConverter, + QuartusInplaceStreamVariableConverter, + QuartusStreamVariableConverter, + QuartusStructMemberVariableConverter, + StaticWeightVariableConverter, +) from hls4ml.model.optimizer import GlobalOptimizerPass -from hls4ml.model.types import InplaceVariable -from hls4ml.backends.fpga.fpga_types import ACTypeConverter, QuartusArrayVariableConverter, HLSTypeConverter, QuartusInplaceVariableConverter, QuartusStreamVariableConverter, QuartusStructMemberVariableConverter, StaticWeightVariableConverter +from hls4ml.model.types import InplaceTensorVariable + class TransformTypes(GlobalOptimizerPass): def __init__(self): self.type_converter = HLSTypeConverter(precision_converter=ACTypeConverter()) self.array_var_converter = QuartusArrayVariableConverter(type_converter=self.type_converter) + self.inplace_array_var_converter = QuartusInplaceArrayVariableConverter(type_converter=self.type_converter) self.struct_var_converter = QuartusStructMemberVariableConverter(type_converter=self.type_converter) self.stream_var_converter = QuartusStreamVariableConverter(type_converter=self.type_converter) + self.inplace_stream_var_converter = QuartusInplaceStreamVariableConverter(type_converter=self.type_converter) self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter) - self.inplace_var_converter = QuartusInplaceVariableConverter(type_converter=self.type_converter) def transform(self, model, node): io_type = node.model.config.get_config_value('IOType') for out_name, var in node.variables.items(): - if isinstance(var, InplaceVariable): - new_var = self.inplace_var_converter.convert(var, io_type) if io_type == 'io_stream': - new_var = self.stream_var_converter.convert(var) + if isinstance(var, InplaceTensorVariable): + new_var = self.inplace_stream_var_converter.convert(var) + else: + new_var = self.stream_var_converter.convert(var) elif io_type == 'io_parallel': - if node.name in node.model.inputs: + if out_name in node.model.inputs: new_var = self.struct_var_converter.convert(var, pragma='hls_register', struct_name='inputs') - elif node.name in node.model.outputs: + elif out_name in node.model.outputs: new_var = self.struct_var_converter.convert(var, pragma='hls_register', struct_name='outputs') + elif isinstance(var, InplaceTensorVariable): + new_var = self.inplace_array_var_converter.convert(var, pragma='') else: new_var = self.array_var_converter.convert(var, pragma='hls_register') else: - raise Exception('Unknown IOType {} in {} ({})'.format(io_type, node.name, node.class_name)) + raise Exception(f'Unknown IOType {io_type} in {node.name} ({node.class_name})') node.set_attr(out_name, new_var) diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py index ef27543d25..253ce63dc7 100644 --- a/hls4ml/backends/quartus/quartus_backend.py +++ b/hls4ml/backends/quartus/quartus_backend.py @@ -45,7 +45,7 @@ def _register_flows(self): initializers = self._get_layer_initializers() init_flow = register_flow('init_layers', initializers, requires=['optimize'], backend=self.name) - streaming_passes = ['quartus:clone_output'] + streaming_passes = ['quartus:reshape_stream', 'quartus:clone_output'] streaming_flow = register_flow('streaming', streaming_passes, requires=[init_flow], backend=self.name) quartus_types = [ @@ -62,7 +62,13 @@ def _register_flows(self): ] quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name) - optimization_passes = ['quartus:remove_final_reshape', 'quartus:optimize_pointwise_conv', 'quartus:skip_softmax'] + optimization_passes = [ + 'quartus:remove_final_reshape', + 'quartus:optimize_pointwise_conv', + 'quartus:inplace_parallel_reshape', + 'quartus:inplace_stream_flatten', + 'quartus:skip_softmax', + ] optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name) templates = self._get_layer_templates() diff --git a/hls4ml/backends/template.py b/hls4ml/backends/template.py index c48b94160a..4ce6c5c8e6 100644 --- a/hls4ml/backends/template.py +++ b/hls4ml/backends/template.py @@ -1,4 +1,3 @@ - from hls4ml.model.optimizer.optimizer import OptimizerPass @@ -9,7 +8,7 @@ def __init__(self, name, layer_class, attribute_name): if not isinstance(self.layer_class, (list, tuple, set)): self.layer_class = [self.layer_class] self.attribute_name = attribute_name - + def match(self, node): for layer_cls in self.layer_class: if node.class_name == layer_cls.__name__: @@ -20,13 +19,14 @@ def transform(self, model, node): formatted_template = self.format(node) node.set_attr(self.attribute_name, formatted_template) return False - + def format(self, node): raise NotImplementedError def get_name(self): return self.name - + + class LayerConfigTemplate(Template): def __init__(self, layer_class): if isinstance(layer_class, (list, tuple, set)): @@ -35,7 +35,7 @@ def __init__(self, layer_class): name = layer_class.__name__.lower() name += '_config_template' super().__init__(name, layer_class, 'config_cpp') - + def _default_config_params(self, layer): params = {} params.update(layer.attributes) @@ -44,6 +44,7 @@ def _default_config_params(self, layer): return params + class FunctionCallTemplate(Template): def __init__(self, layer_class, include_header=None): if isinstance(layer_class, (list, tuple, set)): @@ -52,12 +53,15 @@ def __init__(self, layer_class, include_header=None): name = layer_class.__name__.lower() name += '_function_template' super().__init__(name, layer_class, 'function_cpp') - self.include_header = include_header - + if include_header is None: + self.include_header = () + else: + self.include_header = include_header + def _default_function_params(self, layer): params = {} params.update(layer.attributes) - params['config'] = 'config{}'.format(layer.index) + params['config'] = f'config{layer.index}' params['input_t'] = layer.get_input_variable().type.name params['output_t'] = layer.get_output_variable().type.name params['input'] = layer.get_input_variable().name diff --git a/hls4ml/backends/vivado/passes/repack_stream.py b/hls4ml/backends/vivado/passes/broadcast_stream.py similarity index 60% rename from hls4ml/backends/vivado/passes/repack_stream.py rename to hls4ml/backends/vivado/passes/broadcast_stream.py index ef09ebda83..ec6322cf78 100644 --- a/hls4ml/backends/vivado/passes/repack_stream.py +++ b/hls4ml/backends/vivado/passes/broadcast_stream.py @@ -1,44 +1,21 @@ import numpy as np -from hls4ml.model.optimizer import OptimizerPass -from hls4ml.model.layers import Layer, Merge, Reshape, Concatenate, register_layer from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate +from hls4ml.model.layers import Concatenate, Layer, Merge, register_layer +from hls4ml.model.optimizer import OptimizerPass -class Repack(Layer): - ''' Inserted between layers with different packing factors.''' - - def initialize(self): - shape = self.attributes['target_shape'] - if shape[0] is None: - shape = shape[1:] - dims = ['N_SIZE_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)] - - self.add_output_variable(shape, dims) - -repack_function_template = 'nnet::repack_stream<{input_t}, {output_t}, {size}>({input}, {output});' -repack_include_list = ['nnet_utils/nnet_stream.h'] - -class RepackFunctionTemplate(FunctionCallTemplate): - def __init__(self): - super().__init__(Repack, include_header=repack_include_list) - self.template = repack_function_template - - def format(self, node): - params = self._default_function_params(node) - params['size'] = np.prod(node.get_output_variable().shape) - - return self.template.format(**params) class Broadcast(Layer): - ''' Inserted between layers for broadcasting.''' + '''Inserted between layers for broadcasting.''' def initialize(self): shape = self.attributes['target_shape'] if shape[0] is None: shape = shape[1:] - dims = ['N_SIZE_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)] + dims = [f'N_SIZE_{i}_{self.index}' for i in range(1, len(shape) + 1)] self.add_output_variable(shape, dims) + broadcast_function_template = 'nnet::broadcast_stream<{input_t}, {output_t}, {config}>({input}, {output});' broadcast_config_template = """struct config{index} : nnet::broadcast_config {{ static const unsigned in_width = {in_width}; @@ -50,11 +27,12 @@ def initialize(self): }};\n""" broadcast_include_list = ['nnet_utils/nnet_stream.h'] + class BroadcastConfigTemplate(LayerConfigTemplate): def __init__(self): super().__init__(Broadcast) self.template = broadcast_config_template - + def format(self, node): params = self._default_config_params(node) params['in_height'] = node.get_input_variable().shape[0] @@ -66,48 +44,28 @@ def format(self, node): return self.template.format(**params) + class BroadcastFunctionTemplate(FunctionCallTemplate): def __init__(self): super().__init__(Broadcast, include_header=broadcast_include_list) self.template = broadcast_function_template - + def format(self, node): params = self._default_function_params(node) return self.template.format(**params) -def register_repack_stream(backend): + +def register_broadcast_stream(backend): # Register the layer types to the layer map - register_layer('Repack', Repack) register_layer('Broadcast', Broadcast) - + # Register the optimization passes - backend.register_pass('reshape_stream', ReshapeStream) backend.register_pass('broadcast_stream', BroadcastStream) - + # Register template passes - backend.register_template(RepackFunctionTemplate) backend.register_template(BroadcastConfigTemplate) backend.register_template(BroadcastFunctionTemplate) -class ReshapeStream(OptimizerPass): - ''' Repacks stream for Reshape layer ''' - def match(self, node): - # do not run optimizer pass for a flatten layer (1 output dimension) - return isinstance(node, Reshape) and len(node.get_output_variable().shape) > 1 - - def transform(self, model, node): - if model.config.get_config_value('IOType') != 'io_stream': - return False - - attrs = { - 'target_shape': node.get_attr('target_shape') - } - - # Insert new Repack node instead of Reshape - repack_layer = model.make_node(Repack, 'repack_' + node.name, attrs, node.inputs.copy()) - model.replace_node(node, repack_layer) - - return True class BroadcastStream(OptimizerPass): def match(self, node): @@ -117,24 +75,19 @@ def match(self, node): return inp1.shape != inp2.shape else: return False - + def transform(self, model, node): - if model.config.backend.name not in ['Vivado'] or \ - model.config.get_config_value('IOType') != 'io_stream': + if model.config.backend.name not in ['Vivado'] or model.config.get_config_value('IOType') != 'io_stream': return False inp = [node.get_input_variable(inp_name) for inp_name in node.inputs] if np.prod(inp[0].shape) > np.prod(inp[1].shape): idx = 1 - attrs = { - 'target_shape': inp[0].shape - } + attrs = {'target_shape': inp[0].shape} else: idx = 0 - attrs = { - 'target_shape': inp[1].shape - } + attrs = {'target_shape': inp[1].shape} def supported_broadcast(inp_shape, target_shape): # Must be (H, W, C) @@ -152,8 +105,10 @@ def supported_broadcast(inp_shape, target_shape): inp_shape = node.get_input_variable(brdcst_inp).shape target_shape = attrs['target_shape'] if not supported_broadcast(inp_shape, target_shape): - raise RuntimeError('Unsupported broadcast type for stream: {} -> {};'.format(inp_shape, target_shape) + \ - 'Only (1, 1, C) -> (H, W, C) and (H, W, 1) -> (H, W, C) currently supported') + raise RuntimeError( + f'Unsupported broadcast type for stream: {inp_shape} -> {target_shape};' + + 'Only (1, 1, C) -> (H, W, C) and (H, W, 1) -> (H, W, C) currently supported' + ) brdcst_out = 'broadcast_' + brdcst_inp brdcst_layer = model.make_node('Broadcast', brdcst_out, attrs, [brdcst_inp].copy()) model.insert_node(brdcst_layer, before=node, input_idx=idx) diff --git a/hls4ml/backends/vivado/passes/fifo_depth_optimization.py b/hls4ml/backends/vivado/passes/fifo_depth_optimization.py index a22a7f914a..4d92e98de1 100644 --- a/hls4ml/backends/vivado/passes/fifo_depth_optimization.py +++ b/hls4ml/backends/vivado/passes/fifo_depth_optimization.py @@ -6,8 +6,10 @@ def populate_values(values, name, data, depth): + def get_values(x): + return int(x[1][1:], 2) + values.append({'name': name, 'data': [], 'max': 0, 'depth': 0}) - get_values = lambda x: int(x[1][1:], 2) values[-1]['data'] = [get_values(x) for x in data] values[-1]['max'] = max(values[-1]['data']) values[-1]['depth'] = int(depth[0][1][1:], 2) @@ -15,17 +17,22 @@ def populate_values(values, name, data, depth): def set_big_fifos(vars_to_profile, profiling_fifo_depth): - for k, v in vars_to_profile.items(): - v.pragma = (v.pragma[0], profiling_fifo_depth) + for v in vars_to_profile.values(): + if v.pragma: + v.pragma = (v.pragma[0], profiling_fifo_depth) def get_vcd_data(model): model.write() - model.build(reset=False, csim=True, synth=True, cosim=True, validation=False, export=False, vsynth=False, - fifo_opt=True) + model.build(reset=False, csim=True, synth=True, cosim=True, validation=False, export=False, vsynth=False, fifo_opt=True) with open( - model.config.get_output_dir() + '/' + model.config.get_project_name() + '_prj' + '/solution1/sim/verilog/fifo_opt.vcd') as vcd_file: + model.config.get_output_dir() + + '/' + + model.config.get_project_name() + + '_prj' + + '/solution1/sim/verilog/fifo_opt.vcd' + ) as vcd_file: vcd = VcdParser() vcd.parse(vcd_file) data = vcd.scope.toJson() @@ -38,13 +45,14 @@ def generate_max_depth_file(model, maxs): def set_fifo_depth(model, maxs): - for k, v in model.output_vars.items(): - filtered_max = [x['max'] for x in maxs if v.name in x['name']] - if len(filtered_max) == 0: - continue - if len(filtered_max) > 1: - print('WARNING! Check names of FIFOs') - v.pragma = (v.pragma[0], filtered_max[0] + 1) + for v in model.output_vars.values(): + if v.pragma: + filtered_max = [x['max'] for x in maxs if v.name in x['name']] + if len(filtered_max) == 0: + continue + if len(filtered_max) > 1: + print('WARNING! Check names of FIFOs') + v.pragma = (v.pragma[0], filtered_max[0] + 1) class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass): @@ -57,20 +65,26 @@ def transform(self, model): # check axi-stream or io-stream, if not one the 2 exit if not (model.config.get_config_value('IOType') == 'io_stream'): - raise Exception('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config') + raise RuntimeError('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config') # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs # and so they will be profiled if profiling_fifo_depth: - vars_to_profile = {k: v for k, v in model.output_vars.items() if v != model.get_output_variables()[0] and - v != model.get_input_variables()[0]} + vars_to_profile = { + k: v + for k, v in model.output_vars.items() + if v != model.get_output_variables()[0] and v != model.get_input_variables()[0] + } set_big_fifos(vars_to_profile, profiling_fifo_depth) data = get_vcd_data(model) if len(data['children']) == 0: - print("FIFO depth optimization found no FIFOs implemented using BRAMs in the design, no optimization is possible. Consider increasing profiling_fifo_depth.") + print( + "FIFO depth optimization found no FIFOs implemented using BRAMs in the design, no optimization is possible." + ) + print("Consider increasing profiling_fifo_depth.") return False n_elem = len(data['children'][0]['children'][0]['children']) diff --git a/hls4ml/backends/vivado/passes/transform_types.py b/hls4ml/backends/vivado/passes/transform_types.py index cb8a738e0f..3462578e74 100644 --- a/hls4ml/backends/vivado/passes/transform_types.py +++ b/hls4ml/backends/vivado/passes/transform_types.py @@ -1,32 +1,45 @@ - +from hls4ml.backends.fpga.fpga_types import ( + APTypeConverter, + HLSTypeConverter, + StaticWeightVariableConverter, + VivadoArrayVariableConverter, + VivadoInplaceArrayVariableConverter, + VivadoInplaceStreamVariableConverter, + VivadoStreamVariableConverter, +) from hls4ml.model.optimizer import GlobalOptimizerPass -from hls4ml.model.types import InplaceVariable -from hls4ml.backends.fpga.fpga_types import APTypeConverter, HLSTypeConverter, StaticWeightVariableConverter, VivadoArrayVariableConverter, VivadoInplaceVariableConverter, VivadoStreamVariableConverter +from hls4ml.model.types import InplaceTensorVariable class TransformTypes(GlobalOptimizerPass): def __init__(self): self.type_converter = HLSTypeConverter(precision_converter=APTypeConverter()) self.array_var_converter = VivadoArrayVariableConverter(type_converter=self.type_converter) + self.inplace_array_var_converter = VivadoInplaceArrayVariableConverter(type_converter=self.type_converter) self.stream_var_converter = VivadoStreamVariableConverter(type_converter=self.type_converter) + self.inplace_stream_var_converter = VivadoInplaceStreamVariableConverter(type_converter=self.type_converter) self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter) - self.inplace_var_converter = VivadoInplaceVariableConverter(type_converter=self.type_converter) def transform(self, model, node): io_type = node.model.config.get_config_value('IOType') for out_name, var in node.variables.items(): - if isinstance(var, InplaceVariable): - new_var = self.inplace_var_converter.convert(var, io_type) if io_type == 'io_stream': - new_var = self.stream_var_converter.convert(var) + if isinstance(var, InplaceTensorVariable): + new_var = self.inplace_stream_var_converter.convert(var) + else: + new_var = self.stream_var_converter.convert(var) + elif io_type == 'io_serial': + new_var = self.array_var_converter.convert(var, pragma='stream') elif io_type == 'io_parallel': - if node.name in node.model.inputs: + if out_name in node.model.inputs: new_var = self.array_var_converter.convert(var, pragma='reshape') + elif isinstance(var, InplaceTensorVariable): + new_var = self.inplace_array_var_converter.convert(var, pragma='') else: new_var = self.array_var_converter.convert(var, pragma='partition') else: - raise Exception('Unknown IOType {} in {} ({})'.format(io_type, node.name, node.__class__.__name__)) + raise Exception(f'Unknown IOType {io_type} in {node.name} ({node.__class__.__name__})') node.set_attr(out_name, new_var) @@ -36,4 +49,4 @@ def transform(self, model, node): for t_name, type in node.types.items(): new_type = self.type_converter.convert(type) - node.set_attr(t_name, new_type) \ No newline at end of file + node.set_attr(t_name, new_type) diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index 7698bc680d..743754d14d 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -93,7 +93,13 @@ def _register_flows(self): ] quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name) - optimization_passes = ['vivado:remove_final_reshape', 'vivado:optimize_pointwise_conv', 'vivado:skip_softmax'] + optimization_passes = [ + 'vivado:remove_final_reshape', + 'vivado:optimize_pointwise_conv', + 'vivado:inplace_parallel_reshape', + 'vivado:inplace_stream_flatten', + 'vivado:skip_softmax', + ] optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name) vivado_types = [ diff --git a/hls4ml/model/attributes.py b/hls4ml/model/attributes.py index 71b9b45441..148e751572 100644 --- a/hls4ml/model/attributes.py +++ b/hls4ml/model/attributes.py @@ -1,11 +1,12 @@ from collections.abc import MutableMapping +from numbers import Integral -from hls4ml.model.types import InplaceVariable, NamedType, Source, TensorVariable, WeightVariable +from hls4ml.model.types import NamedType, Source, TensorVariable, WeightVariable from hls4ml.utils.string_utils import convert_to_pascal_case class Attribute: - def __init__(self, name, value_type=int, default=None, configurable=False): + def __init__(self, name, value_type=Integral, default=None, configurable=False): self.name = name self.value_type = value_type self.default = default @@ -78,7 +79,7 @@ def __iter__(self): yield from self.attributes.keys() def __setitem__(self, key, value): - if isinstance(value, (TensorVariable, InplaceVariable)): + if isinstance(value, TensorVariable): self.layer.model.register_output_variable(key, value) self.attributes['result_t'] = value.type if key in self._expected_attributes and key in self.layer.outputs: @@ -121,7 +122,7 @@ def __init__(self, attributes): class VariableMapping(AttributeMapping): def __init__(self, attributes): - super().__init__(attributes, (TensorVariable, InplaceVariable)) + super().__init__(attributes, TensorVariable) def __getitem__(self, key): if 'out_' + key in self.attributes: diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py index 650e571022..c695f0bc45 100644 --- a/hls4ml/model/graph.py +++ b/hls4ml/model/graph.py @@ -316,9 +316,18 @@ def __init__(self, config, data_reader, layer_list, inputs=None, outputs=None): self._applied_flows = [] - # If not provided, assumes layer_list[0] is input, and layer_list[-1] is output - self.inputs = inputs if inputs is not None else [layer_list[0]['name']] - self.outputs = outputs if outputs is not None else [layer_list[-1]['name']] + # If not provided, assumes layer_list[0] is the input layer, and layer_list[-1] is output layer + + # Note, these are actually the variable names, which may differ from the layer name + input_layers = inputs if inputs is not None else [layer_list[0]['name']] + output_layers = outputs if outputs is not None else [layer_list[-1]['name']] + self.inputs = self._find_output_variable_names(layer_list, input_layers) + if self.inputs != input_layers: + raise RuntimeError( + "Currently only support the case when input variables and input layer names match\n" + + f"Input layers = {input_layers}, input_vars = {self.inputs}" + ) + self.outputs = self._find_output_variable_names(layer_list, output_layers) self.index = 0 self.graph = OrderedDict() @@ -331,6 +340,13 @@ def __init__(self, config, data_reader, layer_list, inputs=None, outputs=None): for flow in self.config.flows: self.apply_flow(flow) + def _find_output_variable_names(self, layer_list, layer_names): + """Given a list of all layers, and a list input/output names, find the names of the their outputs that will be used + as the name of the output variables.""" + inout_nodes = [node for node in layer_list if node['name'] in layer_names] + all_node_output_names = [node['outputs'] if 'outputs' in node else [node['name']] for node in inout_nodes] + return [output for node_output_names in all_node_output_names for output in node_output_names] # to flatten + def _make_graph(self, layer_list): for layer in layer_list: kind = layer['class_name'] @@ -504,9 +520,11 @@ def remove_node(self, node, rewire=True): """ if rewire: - if len(node.inputs) > 1 or len(node.outputs) > 1: + inputs = [inp for inp in node.inputs if inp] + outputs = [outp for outp in node.outputs if outp] + if len(inputs) > 1 or len(outputs) > 1: raise Exception('Cannot rewire a node with multiple inputs/outputs') - prev_node = self.graph.get(node.inputs[0]) + prev_node = node.get_input_node(node.inputs[0]) next_nodes = [x for x in self.graph.values() if node.outputs[0] in x.inputs] if prev_node is not None: if len(next_nodes) > 0: diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 77c874f589..c83cae24b9 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1,3 +1,5 @@ +import typing + import numpy as np from hls4ml.model.attributes import ( @@ -17,7 +19,6 @@ ExponentPrecisionType, ExponentWeightVariable, FixedPrecisionType, - InplaceVariable, IntegerPrecisionType, NamedType, TensorVariable, @@ -166,14 +167,16 @@ def _set_accum_t(self): self.set_attr('accum_t', accum_t) def get_input_node(self, input_name=None): - if input_name is not None: - nodes = [node for node in self.model.graph.values() if input_name in node.outputs] - if len(nodes) == 0: - return None + if input_name is None: + if len(self.inputs) > 0: + input_name = self.inputs[0] else: - return nodes[0] + return None + nodes = [node for node in self.model.graph.values() if input_name in node.outputs] + if len(nodes) == 0: + return None else: - return self.model.graph.get(self.inputs[0]) + return nodes[0] def get_input_variable(self, input_name=None): if input_name is not None: @@ -359,17 +362,43 @@ def initialize(self): class Reshape(Layer): + _expected_attributes = [ + Attribute('target_shape', value_type=typing.Sequence), + ] + def initialize(self): - shape = self.attributes['target_shape'] - if shape[0] is None: - shape = shape[1:] - dims = [f'N_SIZE_{i}_{self.index}' for i in range(1, len(shape) + 1)] + input_shape = self.get_input_variable(self.inputs[0]).shape + target_shape = self.get_attr('target_shape') + if target_shape is None: + # need to get it from the input + shape_node = self.get_input_node(self.inputs[1]) + # for QONNX, remove batch dimension + if shape_node: + target_shape = shape_node.value[1:] + else: + raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.") - out_name = self.outputs[0] - proxy = self.get_input_variable() - out = InplaceVariable(shape, dims, proxy) + # remove Nones -- is this ever triggered? + if target_shape[0] is None: + target_shape = target_shape[1:] - self.set_attr(out_name, out) + # take care of -1 shapes + shape = self._infer_output_shape(input_shape, target_shape) + + # update the target shape with chnges from above + self.set_attr('target_shape', shape) + + dims = [f'N_SIZE_{i}_{self.index}' for i in range(len(shape))] + + self.add_output_variable(shape, dims) + + def _infer_output_shape(self, input_shape, target_shape): + """Expand the shape that potentially includes -1 as one of the dimensions.""" + if -1 in target_shape: # Need to infer shape for -1 + dummy_x = np.ones(input_shape) + dummy_y = np.reshape(dummy_x, target_shape) + return list(dummy_y.shape) + return target_shape class Dense(Layer): diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py index 115ff5cce0..208470df08 100644 --- a/hls4ml/model/types.py +++ b/hls4ml/model/types.py @@ -1,9 +1,12 @@ -from enum import Enum import re +from enum import Enum + import numpy as np +# region Quantizer definition -class Quantizer(object): + +class Quantizer: def __init__(self, bits, hls_type): self.bits = bits self.hls_type = hls_type @@ -11,6 +14,12 @@ def __init__(self, bits, hls_type): def __call__(self, data): raise NotImplementedError + +# endregion + +# region Precision types + + class RoundingMode(Enum): TRN = 1 TRN_ZERO = 2 @@ -30,6 +39,7 @@ def from_string(cls, mode): return cls[mode] + class SaturationMode(Enum): WRAP = 1 SAT = 2 @@ -46,11 +56,13 @@ def from_string(cls, mode): return cls[mode] -class PrecisionType(object): + +class PrecisionType: def __init__(self, width, signed): self.width = width self.signed = signed + class IntegerPrecisionType(PrecisionType): def __init__(self, width=16, signed=True): super().__init__(width=width, signed=signed) @@ -69,11 +81,12 @@ def __eq__(self, other): eq = eq and self.fractional == other.fractional return eq + class FixedPrecisionType(PrecisionType): def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturation_mode=None, saturation_bits=None): super().__init__(width=width, signed=signed) self.integer = integer - self.fractional = width-integer + self.fractional = width - integer self.rounding_mode = rounding_mode self.saturation_mode = saturation_mode self.saturation_bits = saturation_bits @@ -116,34 +129,40 @@ def __eq__(self, other): eq = eq and self.saturation_bits == other.saturation_bits return eq + class XnorPrecisionType(IntegerPrecisionType): ''' Convenience class to differentiate 'regular' integers from BNN Xnor ones ''' + def __init__(self): super().__init__(width=1, signed=False) + class ExponentPrecisionType(IntegerPrecisionType): ''' - Convenience class to differentiate 'regular' integers from those which represent exponents, for QKeras po2 quantizers, for example. + Convenience class to differentiate 'regular' integers from those which represent exponents, + for QKeras po2 quantizers, for example. ''' + def __init__(self, width=16, signed=True): super().__init__(width=width, signed=signed) + def find_minimum_width(data, signed=True): """ Helper function to find the minimum integer width to express all entries in the data array without saturation / overflow """ maxdata = np.amax(np.abs(data)) - if maxdata == 0.: + if maxdata == 0.0: # fringe case (amax(abs(data)) == 0 -> data is uniformly zero) return 1 log2max = np.log2(maxdata) iwidth = max(0, int(np.ceil(log2max))) - if iwidth == int(np.floor(log2max)): # is a power-of-two integer -> need one extra bit + if iwidth == int(np.floor(log2max)): # is a power-of-two integer -> need one extra bit iwidth += 1 if signed: @@ -152,28 +171,37 @@ def find_minimum_width(data, signed=True): return iwidth -class NamedType(object): + +# endregion + +# region Data type definitions + + +class NamedType: def __init__(self, name, precision, **kwargs): self.name = name.format(**kwargs) self.precision = precision + class CompressedType(NamedType): def __init__(self, name, precision, index_precision, **kwargs): if not name.startswith('compressed_'): name = 'compressed_' + name - super(CompressedType, self).__init__(name, precision, **kwargs) + super().__init__(name, precision, **kwargs) self.index_precision = index_precision + class ExponentType(NamedType): def __init__(self, name, precision, **kwargs): if not name.startswith('exponent_'): name = 'exponent_' + name - super(ExponentType, self).__init__(name, precision, **kwargs) + super().__init__(name, precision, **kwargs) self.sign = XnorPrecisionType() + class PackedType(NamedType): def __init__(self, name, precision, n_elem, n_pack, **kwargs): - super(PackedType, self).__init__(name, precision, **kwargs) + super().__init__(name, precision, **kwargs) self.n_elem = n_elem if n_pack < 0: self.n_pack = -n_pack @@ -182,14 +210,21 @@ def __init__(self, name, precision, n_elem, n_pack, **kwargs): self.n_pack = n_pack self.unpack = False -class Variable(object): + +# endregion + +# region Variables + + +class Variable: def __init__(self, var_name, atype, **kwargs): self.name = var_name.format(**kwargs) self.type = atype + class TensorVariable(Variable): def __init__(self, shape, dim_names, var_name='layer{index}', type_name='layer{index}_t', precision=None, **kwargs): - super(TensorVariable, self).__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs) + super().__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs) self.shape = shape self.dim_names = dim_names @@ -203,29 +238,26 @@ def size(self): return nelem def size_cpp(self): - #TODO get rid of size_cpp() (and dim_names) + # TODO get rid of size_cpp() (and dim_names) return '*'.join([str(k) for k in self.dim_names]) -class InplaceVariable(Variable): - def __init__(self, shape, dim_names, proxy): - self.shape = shape - self.dim_names = dim_names - self.type = proxy.type - self.name = proxy.name - self.size = proxy.size - def get_shape(self): - return zip(self.dim_names, self.shape) +class InplaceTensorVariable(TensorVariable): + '''A TensorVariable that is just a link to another''' - def size_cpp(self): - return '*'.join([str(k) for k in self.dim_names]) + def __init__(self, tv, input_var): + ''' + Always created with a passed in TensorVariable tv + and the input_var variable it should link to. + ''' + self.__dict__.update(tv.__dict__) + self.type = input_var.type + self.input_var = input_var - def definition_cpp(self, name_suffix='', as_reference=False): - return None class WeightVariable(Variable): def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs): - super(WeightVariable, self).__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs) + super().__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs) self.data = data self.nzeros = -1 self.shape = list(self.data.shape) @@ -264,20 +296,21 @@ def update_precision(self, new_precision): width_bits = int(precision_bits[0]) integer_bits = int(precision_bits[1]) fractional_bits = integer_bits - width_bits - lsb = 2 ** fractional_bits + lsb = 2**fractional_bits if lsb < 1: # Use str to represent the float with digits, get the length # to right of decimal point decimal_spaces = len(str(lsb).split('.')[1]) else: decimal_spaces = len(str(2**integer_bits)) - self.precision_fmt = '%.{}f'.format(decimal_spaces) + self.precision_fmt = f'%.{decimal_spaces}f' else: self.precision_fmt = '%f' + class CompressedWeightVariable(WeightVariable): def __init__(self, var_name, type_name, precision, data, reuse_factor, quantizer=None, **kwargs): - super(CompressedWeightVariable, self).__init__(var_name, type_name, precision, data, quantizer=quantizer, **kwargs) + super().__init__(var_name, type_name, precision, data, quantizer=quantizer, **kwargs) self.extra_zeros = 0 self.data_length = np.prod(data.shape) - self.nzeros while self.data_length % reuse_factor != 0: @@ -321,9 +354,10 @@ def __next__(self): next = __next__ + class ExponentWeightVariable(WeightVariable): def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs): - super(ExponentWeightVariable, self).__init__(var_name, type_name, precision, data, quantizer, **kwargs) + super().__init__(var_name, type_name, precision, data, quantizer, **kwargs) ''' WeightVariable for Exponent aka po2 data. The data should already by quantized by the quantizer. ''' @@ -336,7 +370,7 @@ def _format(self): sign = np.where(y < 0, np.zeros_like(y), np.ones_like(y)) # Take the logarithm, since this is what we will write to the header # for the optimized product using shifts - y = (np.log2(np.abs(y)) / np.log2(2.)).astype('int') + y = (np.log2(np.abs(y)) / np.log2(2.0)).astype('int') return np.stack((sign, y), axis=-1) def __iter__(self): @@ -351,9 +385,18 @@ def __next__(self): next = __next__ -class Source(object): + +# endregion + +# region Custom source + + +class Source: def __init__(self, code): self.code = code - + def __str__(self): return str(self.code) + + +# endregion diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_stream.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_stream.h index 45e821adc5..b5b55e2045 100644 --- a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_stream.h +++ b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_stream.h @@ -6,22 +6,22 @@ namespace nnet { struct broadcast_config { - static const unsigned in_height = 10; - static const unsigned in_width = 10; - static const unsigned n_chan = 1; - static const unsigned n_dupl = 2; + static const unsigned in_height = 10; + static const unsigned in_width = 10; + static const unsigned n_chan = 1; + static const unsigned n_dupl = 2; }; -template +template void clone_stream(stream &data, stream &res1, stream &res2) { - CloneLoop: +CloneLoop: #pragma ii 1 for (int i = 0; i < N / data_T::size; i++) { data_T in_data = data.read(); res_T out_data1; res_T out_data2; - - ClonePack: + + ClonePack: #pragma unroll for (int j = 0; j < data_T::size; j++) { out_data1[j] = in_data[j]; @@ -33,9 +33,9 @@ void clone_stream(stream &data, stream &res1, stream &res2 } } -template +template void clone_stream(stream &data, stream &res1, stream &res2, stream &res3) { - CloneLoop: +CloneLoop: #pragma ii 1 for (int i = 0; i < N / data_T::size; i++) { data_T in_data = data.read(); @@ -43,7 +43,7 @@ void clone_stream(stream &data, stream &res1, stream &res2 res_T out_data2; res_T out_data3; - ClonePack: + ClonePack: #pragma unroll for (int j = 0; j < data_T::size; j++) { out_data1[j] = in_data[j]; @@ -57,6 +57,65 @@ void clone_stream(stream &data, stream &res1, stream &res2 } } +template void repack_stream(stream &data, stream &res) { + if (data_T::size == res_T::size) { + #pragma ii 1 + for (int i = 0; i < N / data_T::size; i++) { + + data_T in_data = data.read(); + res_T out_data; + + #pragma unroll + for (int j = 0; j < data_T::size; j++) { + out_data[j] = in_data[j]; + } + + res.write(out_data); + } + } else if (data_T::size > res_T::size) { + constexpr unsigned pack_diff = data_T::size / res_T::size; + + for (int i = 0; i < N / data_T::size; i++) { + + data_T in_data = data.read(); + res_T out_data; + + #pragma ii 1 + for (int j = 0; j < pack_diff; j++) { + + res_T out_data; + + #pragma unroll + for (int k = 0; k < res_T::size; k++) { + out_data[k] = in_data[j * res_T::size + k]; + } + res.write(out_data); + } + } + } else { // data_T::size < res_T::size + res_T out_data; + constexpr unsigned pack_diff = res_T::size / data_T::size; + unsigned pack_cnt = 0; + #pragma ii 1 + for (int i = 0; i < N / data_T::size; i++) { + + data_T in_data = data.read(); + + #pragma unroll + for (int j = 0; j < data_T::size; j++) { + out_data[pack_cnt * data_T::size + j] = in_data[j]; + } + + if (pack_cnt == pack_diff - 1) { + res.write(out_data); + pack_cnt = 0; + } else { + pack_cnt++; + } + } + } } +} // namespace nnet + #endif diff --git a/test/pytest/test_reshape.py b/test/pytest/test_reshape.py new file mode 100755 index 0000000000..3c421c1474 --- /dev/null +++ b/test/pytest/test_reshape.py @@ -0,0 +1,49 @@ +""" Test that reshape is properly handled by optimizers. +""" + +from pathlib import Path + +import numpy as np +import pytest +import tensorflow as tf + +import hls4ml + +test_root_path = Path(__file__).parent + + +def randX(batch_size, N): + return np.random.rand(batch_size, N) + + +@pytest.fixture(scope='module') +def randX_20_10(): + return randX(20, 10) + + +@pytest.mark.parametrize('backend', ['Vivado', 'Quartus']) +@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) +def test_reshape_parallel(randX_20_10, backend, io_type): + model = tf.keras.models.Sequential( + [ + tf.keras.layers.Input(shape=(10,)), + tf.keras.layers.Dense(10 * 3), + tf.keras.layers.Reshape((10, 3)), + tf.keras.layers.ReLU(), + ] + ) + model.compile(optimizer='adam', loss='mse') + config = hls4ml.utils.config_from_keras_model(model) + prj_name = f'hls4mlprj_reshape_{backend}_{io_type}' + output_dir = str(test_root_path / prj_name) + hls_model = hls4ml.converters.convert_from_keras_model( + model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend + ) + hls_model.compile() + + X = randX_20_10 + y_qkeras = model.predict(X) + y_hls4ml = hls_model.predict(X) + + # check that the values are close + np.testing.assert_allclose(y_qkeras.ravel(), y_hls4ml.ravel(), atol=0.02)