diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index 3385b95eec..4e0fe5fbb3 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -59,28 +59,48 @@ def write(self, model): def get_writer_flow(self): raise NotImplementedError - def get_valid_reuse_factors(self, layer): - n_in = 0 - n_out = 0 + def get_layer_mult_size(self, layer): if 'Dense' in layer.class_name: n_in = layer.get_attr('n_in') n_out = layer.get_attr('n_out') - elif 'Conv1D' in layer.class_name: + return n_in, n_out + + if 'Conv1D' in layer.class_name: n_in = layer.get_attr('n_chan') * layer.get_attr('filt_width') n_out = layer.get_attr('n_filt') - elif 'Conv2D' in layer.class_name: + return n_in, n_out + + if 'Conv2D' in layer.class_name: n_in = layer.get_attr('n_chan') * layer.get_attr('filt_height') * layer.get_attr('filt_width') n_out = layer.get_attr('n_filt') + return n_in, n_out + + if 'LSTM' in layer.class_name: + n_in = layer.get_attr('n_in') + n_out = layer.get_attr('n_out') * 4 + n_in_recr = layer.get_attr('n_out') + n_out_recr = n_out + return n_in, n_out, n_in_recr, n_out_recr + + if 'GRU' in layer.class_name: + n_in = layer.get_attr('n_in') + n_out = layer.get_attr('n_out') * 3 + n_in_recr = layer.get_attr('n_out') + n_out_recr = n_out + return n_in, n_out, n_in_recr, n_out_recr + + raise Exception(f'Cannot get mult size for layer {layer.name} ({layer.class_name})') + def get_valid_reuse_factors(self, n_in, n_out): max_rf = n_in * n_out valid_reuse_factors = [] for rf in range(1, max_rf + 1): - _assert = self._check_conditions(n_in, n_out, rf) + _assert = self._validate_reuse_factor(n_in, n_out, rf) if _assert: valid_reuse_factors.append(rf) return valid_reuse_factors - def _check_conditions(self, n_in, n_out, rf): + def _validate_reuse_factor(self, n_in, n_out, rf): multfactor = min(n_in, rf) multiplier_limit = int(math.ceil((n_in * n_out) / float(multfactor))) # @@ -112,16 +132,19 @@ def get_closest_reuse_factor(self, valid_rf, chosen_rf): else: return before - def set_closest_reuse_factor(self, layer): - valid_rf = self.get_valid_reuse_factors(layer) - chosen_rf = layer.get_attr('reuse_factor') + def set_closest_reuse_factor(self, layer, n_in, n_out, attribute='reuse_factor'): + assert attribute is not None, 'Reuse factor attribute cannot be None' + + valid_rf = self.get_valid_reuse_factors(n_in, n_out) + chosen_rf = layer.get_attr(attribute) if chosen_rf not in valid_rf: closest_rf = self.get_closest_reuse_factor(valid_rf, chosen_rf) print('WARNING: Invalid ReuseFactor={} in layer "{}". Using ReuseFactor={} instead. Valid ReuseFactor(s): {}.' .format(chosen_rf, layer.name, closest_rf, ','.join(map(str, valid_rf)))) - layer.set_attr('reuse_factor', closest_rf) + layer.set_attr(attribute, closest_rf) def set_target_reuse_factor(self, layer): + # TODO update target reuse factor for the RNN layers targ_cycles = layer.get_attr('target_cycles') shuffle_cycles = 6 # Number of clock cycles to move data around diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py index 9cb4f44bf4..1066188690 100644 --- a/hls4ml/backends/quartus/quartus_backend.py +++ b/hls4ml/backends/quartus/quartus_backend.py @@ -156,7 +156,8 @@ def init_dense(self, layer): if layer.model.config.get_compression(layer): layer.set_attr('strategy', 'compressed') else: - self.set_closest_reuse_factor(layer) + n_in, n_out = self.get_layer_mult_size(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) self.gen_quartus_weight_array(layer) layer.set_attr('strategy', 'resource') diff --git a/hls4ml/backends/vivado/passes/core_templates.py b/hls4ml/backends/vivado/passes/core_templates.py index 201562f7fb..bd6ea21460 100644 --- a/hls4ml/backends/vivado/passes/core_templates.py +++ b/hls4ml/backends/vivado/passes/core_templates.py @@ -1,6 +1,6 @@ from hls4ml.backends.backend import get_backend -from hls4ml.model.layers import Activation, BatchNormalization, Dense, PReLU, ParametrizedActivation, Softmax +from hls4ml.model.layers import Activation, BatchNormalization, Dense, Embedding, PReLU, ParametrizedActivation, Softmax from hls4ml.backends.template import LayerConfigTemplate, FunctionCallTemplate # Dense templates @@ -174,3 +174,39 @@ def format(self, node): params['config'] = '{}_config{}'.format(node.get_attr('activation'), node.index) return self.template.format(**params) + + +# Embedding templates + +embed_config_template = """struct config{index} : nnet::embed_config {{ + static const unsigned n_in = {n_in}; + static const unsigned n_out = {n_out}; + static const unsigned vocab_size = {vocab_size}; + static const unsigned io_type = nnet::{iotype}; + static const unsigned reuse_factor = {reuse}; + typedef {embeddings_t.name} embeddings_t; +}};\n""" + +embed_function_template = 'nnet::embedding<{input_t}, {output_t}, {config}>({input}, {output}, {e});' + +embed_include_list = ['nnet_utils/nnet_embed.h', 'nnet_utils/nnet_embed_stream.h'] + +class EmbeddingConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__(Embedding) + self.template = embed_config_template + + def format(self, node): + params = self._default_config_params(node) + return self.template.format(**params) + +class EmbeddingFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(Embedding, include_header=embed_include_list) + self.template = embed_function_template + + def format(self, node): + params = self._default_function_params(node) + params['e'] = node.get_weights('embeddings').name + + return self.template.format(**params) diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index a980d33ec9..0140e55c15 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -7,7 +7,7 @@ from collections.abc import Iterable from hls4ml.model.types import FixedPrecisionType, NamedType, IntegerPrecisionType -from hls4ml.model.layers import Layer, Dense, BatchNormalization, Conv1D, Conv2D, Conv2DBatchnorm, SeparableConv1D, SeparableConv2D, DepthwiseConv2D, Activation, ParametrizedActivation, PReLU, Softmax, Pooling1D, Pooling2D, GlobalPooling1D, GlobalPooling2D, ZeroPadding1D, ZeroPadding2D, Merge, Concatenate, Dot, Resize, Transpose, GarNet, GarNetStack +from hls4ml.model.layers import Layer, Dense, BatchNormalization, Embedding, Conv1D, Conv2D, Conv2DBatchnorm, SeparableConv1D, SeparableConv2D, DepthwiseConv2D, Activation, ParametrizedActivation, PReLU, Softmax, Pooling1D, Pooling2D, GlobalPooling1D, GlobalPooling2D, ZeroPadding1D, ZeroPadding2D, Merge, Concatenate, Dot, Resize, Transpose, SimpleRNN, LSTM, GRU, GarNet, GarNetStack from hls4ml.model.attributes import Attribute from hls4ml.model.optimizer import get_backend_passes, layer_optimizer, model_optimizer from hls4ml.model.flow import register_flow @@ -18,8 +18,17 @@ class VivadoBackend(FPGABackend): def __init__(self): super(VivadoBackend, self).__init__('Vivado') + self._register_layer_attributes() self._register_flows() + def _register_layer_attributes(self): + extended_attrs = { + SimpleRNN: [Attribute('recurrent_reuse_factor', default=1)], + LSTM: [Attribute('recurrent_reuse_factor', default=1)], + GRU: [Attribute('recurrent_reuse_factor', default=1)], + } + self.attribute_map.update(extended_attrs) + def _register_flows(self): initializers = self._get_layer_initializers() init_flow = register_flow('init_layers', initializers, requires=['optimize'], backend=self.name) @@ -123,8 +132,9 @@ def init_dense(self, layer): index_t = IntegerPrecisionType(width=1, signed=False) compression = layer.model.config.get_compression(layer) if layer.model.config.is_resource_strategy(layer): + n_in, n_out = self.get_layer_mult_size(layer) self.set_target_reuse_factor(layer) - self.set_closest_reuse_factor(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) if compression: layer.set_attr('strategy', 'compressed') index_t = layer.get_weights('weight').type.index_precision @@ -142,8 +152,9 @@ def init_conv1d(self, layer): if layer.model.config.is_resource_strategy(layer): layer.set_attr('strategy', 'resource') + n_in, n_out = self.get_layer_mult_size(layer) self.set_target_reuse_factor(layer) - self.set_closest_reuse_factor(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) else: layer.set_attr('strategy', 'latency') @@ -153,7 +164,8 @@ def init_conv1d(self, layer): def init_sepconv1d(self, layer): if layer.model.config.is_resource_strategy(layer): layer.set_attr('strategy', 'resource') - self.set_closest_reuse_factor(layer) + n_in, n_out = self.get_layer_mult_size(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) else: layer.set_attr('strategy', 'latency') @@ -167,7 +179,8 @@ def init_conv2d(self, layer): if layer.model.config.is_resource_strategy(layer): layer.set_attr('strategy', 'resource') self.set_target_reuse_factor(layer) - self.set_closest_reuse_factor(layer) + n_in, n_out = self.get_layer_mult_size(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) else: layer.set_attr('strategy', 'latency') @@ -177,7 +190,8 @@ def init_conv2d(self, layer): def init_sepconv2d(self, layer): if layer.model.config.is_resource_strategy(layer): layer.set_attr('strategy', 'resource') - self.set_closest_reuse_factor(layer) + n_in, n_out = self.get_layer_mult_size(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) else: layer.set_attr('strategy', 'latency') @@ -187,7 +201,8 @@ def init_sepconv2d(self, layer): def init_depconv2d(self, layer): if layer.model.config.is_resource_strategy(layer): layer.set_attr('strategy', 'resource') - self.set_closest_reuse_factor(layer) + n_in, n_out = self.get_layer_mult_size(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) else: layer.set_attr('strategy', 'latency') @@ -215,6 +230,64 @@ def init_softmax(self, layer): if layer.model.config.get_config_value('IOType') == 'io_parallel': assert len(layer.get_input_variable().shape) == 1, 'Softmax with io_parallel strategy cannot be used on multidimensional tensors.' + @layer_optimizer(Embedding) + def init_embed(self, layer): + if layer.attributes['n_in'] is None: + raise Exception('Input length of Embedding layer must be specified.') + + @layer_optimizer(LSTM) + def init_lstm(self, layer): + # TODO Allow getting recurrent reuse factor from the config + reuse_factor = layer.model.config.get_reuse_factor(layer) + layer.set_attr('recurrent_reuse_factor', reuse_factor) + + recurrent_bias = np.zeros(layer.weights['recurrent_weight'].shape[1]) + layer.add_weights_variable(name='recurrent_bias', var_name='br{index}', data=recurrent_bias) + + index_t = IntegerPrecisionType(width=1, signed=False) + + if 'table_t' not in layer.attributes: + layer.set_attr('table_t', FixedPrecisionType(width=18, integer=8)) + if 'table_size' not in layer.attributes: + layer.set_attr('table_size', 1024) + if layer.model.config.is_resource_strategy(layer): + n_in, n_out, n_in_recr, n_out_recr = self.get_layer_mult_size(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) + self.set_closest_reuse_factor(layer, n_in_recr, n_out_recr, attribute='recurrent_reuse_factor') + layer.weights['weight'].data = np.transpose(layer.weights['weight'].data) + layer.weights['recurrent_weight'].data = np.transpose(layer.weights['recurrent_weight'].data) + layer.set_attr('strategy', 'resource') + else: + layer.set_attr('strategy', 'latency') + + layer.set_attr('index_t', index_t) + + @layer_optimizer(GRU) + def init_gru(self, layer): + reuse_factor = layer.model.config.get_reuse_factor(layer) + layer.set_attr('recurrent_reuse_factor', reuse_factor) + + recurrent_bias = np.zeros(layer.weights['recurrent_weight'].shape[1]) + layer.add_weights_variable(name='recurrent_bias', var_name='br{index}', data=recurrent_bias) + + index_t = IntegerPrecisionType(width=1, signed=False) + + if 'table_t' not in layer.attributes: + layer.set_attr('table_t', FixedPrecisionType(width=18, integer=8)) + if 'table_size' not in layer.attributes: + layer.set_attr('table_size', 1024) + if layer.model.config.is_resource_strategy(layer): + n_in, n_out, n_in_recr, n_out_recr = self.get_layer_mult_size(layer) + self.set_closest_reuse_factor(layer, n_in, n_out) + self.set_closest_reuse_factor(layer, n_in_recr, n_out_recr, attribute='recurrent_reuse_factor') + layer.weights['weight'].data = np.transpose(layer.weights['weight'].data) + layer.weights['recurrent_weight'].data = np.transpose(layer.weights['recurrent_weight'].data) + layer.set_attr('strategy', 'resource') + else: + layer.set_attr('strategy', 'latency') + + layer.set_attr('index_t', index_t) + @layer_optimizer(GarNet) def init_garnet(self, layer): reuse_factor = layer.attributes['reuse_factor'] diff --git a/hls4ml/converters/keras/core.py b/hls4ml/converters/keras/core.py index 4e67c6adb1..2593f60976 100644 --- a/hls4ml/converters/keras/core.py +++ b/hls4ml/converters/keras/core.py @@ -127,3 +127,18 @@ def parse_batchnorm_layer(keras_layer, input_names, input_shapes, data_reader, c layer['n_filt']=input_shapes[0][3] return layer, [shape for shape in input_shapes[0]] + + +@keras_handler('Embedding') +def parse_embedding_layer(keras_layer, input_names, input_shapes, data_reader, config): + assert('Embedding' in keras_layer['class_name']) + + layer = parse_default_keras_layer(keras_layer, input_names) + + layer['n_in'] = input_shapes[0][1] + layer['vocab_size'] = keras_layer['config']['input_dim'] + layer['n_out'] = keras_layer['config']['output_dim'] + + output_shape = input_shapes[0] + [layer['n_out']] + + return layer, output_shape diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py new file mode 100644 index 0000000000..8c7364f76e --- /dev/null +++ b/hls4ml/converters/keras/recurrent.py @@ -0,0 +1,44 @@ +import numpy as np + +from hls4ml.converters.keras_to_hls import parse_default_keras_layer +from hls4ml.converters.keras_to_hls import keras_handler + +from hls4ml.model.types import Quantizer +from hls4ml.model.types import IntegerPrecisionType + +rnn_layers = ['SimpleRNN', 'LSTM', 'GRU'] +@keras_handler(*rnn_layers) +def parse_rnn_layer(keras_layer, input_names, input_shapes, data_reader, config): + assert(keras_layer['class_name'] in rnn_layers) + + layer = parse_default_keras_layer(keras_layer, input_names) + + layer['return_sequences'] = keras_layer['config']['return_sequences'] + layer['return_state'] = keras_layer['config']['return_state'] + + if layer['class_name'] != 'SimpleRNN': + layer['recurrent_activation'] = keras_layer['config']['recurrent_activation'] + + layer['time_major'] = keras_layer['config']['time_major'] if 'time_major' in keras_layer['config'] else False + + # TODO Should we handle time_major? + if layer['time_major']: + raise Exception('Time-major format is not supported by hls4ml'.format(layer['class_name'])) + + layer['n_timesteps'] = input_shapes[0][1] + layer['n_in'] = input_shapes[0][2] + + layer['n_out'] = keras_layer['config']['units'] + + if layer['class_name'] == 'GRU': + layer['apply_reset_gate'] = 'after' if keras_layer['config']['reset_after'] else 'before' + + if layer['return_sequences']: + output_shape = [input_shapes[0][0], layer['n_timesteps'], layer['n_out']] + else: + output_shape = [input_shapes[0][0], layer['n_out']] + + if layer['return_state']: + raise Exception('"return_state" of {} layer is not yet supported.'.format(layer['class_name'])) + + return layer, output_shape \ No newline at end of file diff --git a/hls4ml/converters/keras_to_hls.py b/hls4ml/converters/keras_to_hls.py index df3ede3a60..7353faa138 100644 --- a/hls4ml/converters/keras_to_hls.py +++ b/hls4ml/converters/keras_to_hls.py @@ -229,6 +229,10 @@ def keras_to_hls(config): #Define layers to skip for conversion to HLS skip_layers = ['Dropout'] + # Activation layers + activation_layers = ['Activation', 'LeakyReLU', 'ThresholdedReLU', 'ELU', 'PReLU', 'Softmax', 'TernaryTanh'] + # Recurrent layers + recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU'] #All supported layers supported_layers = get_supported_keras_layers() + skip_layers @@ -310,7 +314,7 @@ def keras_to_hls(config): print('Layer name: {}, layer type: {}, input shapes: {}, output shape: {}'.format(layer['name'], layer['class_name'], input_shapes, output_shape)) layer_list.append( layer ) - if 'activation' in layer and layer['class_name'] not in ['Activation', 'LeakyReLU', 'ThresholdedReLU', 'ELU', 'PReLU', 'Softmax', 'TernaryTanh']:# + qkeras_layers: + if 'activation' in layer and layer['class_name'] not in activation_layers + recurrent_layers:# + qkeras_layers: act_layer = {} act_layer['name'] = layer['name'] + '_' + layer['activation'] act_layer['activation'] = layer['activation'] diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index fb4b7db8a8..7f58446e2e 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -825,6 +825,150 @@ def initialize(self): self.set_attr('width', inp.shape[2]) self.add_output_variable(shape, dims, precision=inp.type.precision) +class Embedding(Layer): + _expected_attributes = [ + Attribute('n_in'), + Attribute('n_out'), + Attribute('vocab_size'), + + WeightAttribute('embeddings'), + TypeAttribute('embeddings'), + ] + + def initialize(self): + shape = self.get_input_variable().shape[:] + shape += [self.attributes['n_out']] + if len(shape) > 1: + dims = ['N_LAYER_{}_{}'.format(i, self.index) for i in range(1, len(shape) + 1)] + else: + dims = ['N_LAYER_{}'.format(self.index)] + self.add_output_variable(shape, dims) + + data = self.model.get_weights_data(self.name, 'embeddings') + self.add_weights_variable(name='embeddings', var_name='e{index}', data=data) + +class SimpleRNN(Layer): + _expected_attributes = [ + Attribute('n_out'), + Attribute('activation', value_type=str), + Attribute('return_sequences', value_type=bool, default=False), + Attribute('return_state', value_type=bool, default=False), + ChoiceAttribute('direction', ['forward', 'backward'], default='forward'), + + WeightAttribute('weight'), + WeightAttribute('bias'), + WeightAttribute('recurrent_weight'), + + TypeAttribute('weight'), + TypeAttribute('bias'), + TypeAttribute('recurrent_weight'), + ] + + def initialize(self): + if self.attributes['return_sequences']: + shape = [self.attributes['n_timesteps'], self.attributes['n_out']] + dims = ['N_TIME_STEPS_{}'.format(self.index), 'N_OUT_{}'.format(self.index)] + else: + shape = [self.attributes['n_out']] + dims = ['N_OUT_{}'.format(self.index)] + + self.add_output_variable(shape, dims) + + if self.attributes['return_state']: + state_shape = [self.attributes['n_out']] + state_dims = ['N_OUT_{}'.format(self.index)] + self.add_output_variable(state_shape, state_dims, out_name=self.outputs[1], var_name='layer{index}_h', type_name='layer{index}_h_t') + self.add_output_variable(state_shape, state_dims, out_name=self.outputs[2], var_name='layer{index}_c', type_name='layer{index}_c_t') + + self.add_weights() + self.add_bias() + + recurrent_weight = self.model.get_weights_data(self.name, 'recurrent_kernel') + self.add_weights_variable(name='recurrent_weight', var_name='wr{index}', data=recurrent_weight) + +class LSTM(Layer): + _expected_attributes = [ + Attribute('n_out'), + Attribute('activation', value_type=str), + Attribute('recurrent_activation', value_type=str), + Attribute('return_sequences', value_type=bool, default=False), + Attribute('return_state', value_type=bool, default=False), + ChoiceAttribute('direction', ['forward', 'backward'], default='forward'), + Attribute('time_major', value_type=bool, default=False), + + WeightAttribute('weight'), + WeightAttribute('bias'), + WeightAttribute('recurrent_weight'), + + TypeAttribute('weight'), + TypeAttribute('bias'), + TypeAttribute('recurrent_weight'), + ] + + def initialize(self): + if self.attributes['return_sequences']: + shape = [self.attributes['n_timesteps'], self.attributes['n_out']] + dims = ['N_TIME_STEPS_{}'.format(self.index), 'N_OUT_{}'.format(self.index)] + else: + shape = [self.attributes['n_out']] + dims = ['N_OUT_{}'.format(self.index)] + + self.add_output_variable(shape, dims) + + if self.attributes['return_state']: + state_shape = [self.attributes['n_out']] + state_dims = ['N_OUT_{}'.format(self.index)] + self.add_output_variable(state_shape, state_dims, out_name=self.outputs[1], var_name='layer{index}_h', type_name='layer{index}_h_t') + self.add_output_variable(state_shape, state_dims, out_name=self.outputs[2], var_name='layer{index}_c', type_name='layer{index}_c_t') + + self.add_weights() + self.add_bias() + + recurrent_weight = self.model.get_weights_data(self.name, 'recurrent_kernel') + self.add_weights_variable(name='recurrent_weight', var_name='wr{index}', data=recurrent_weight) + +class GRU(Layer): + _expected_attributes = [ + Attribute('n_out'), + Attribute('activation', value_type=str), + Attribute('recurrent_activation', value_type=str), + Attribute('return_sequences', value_type=bool, default=False), + Attribute('return_state', value_type=bool, default=False), + ChoiceAttribute('direction', ['forward', 'backward'], default='forward'), + Attribute('time_major', value_type=bool, default=False), + ChoiceAttribute('apply_reset_gate', ['before', 'after'], default='after'), + + WeightAttribute('weight'), + WeightAttribute('bias'), + WeightAttribute('recurrent_weight'), + + TypeAttribute('weight'), + TypeAttribute('bias'), + TypeAttribute('recurrent_weight'), + ] + + def initialize(self): + if self.attributes['return_sequences']: + shape = [self.attributes['n_timesteps'], self.attributes['n_out']] + dims = ['N_TIME_STEPS_{}'.format(self.index), 'N_OUT_{}'.format(self.index)] + else: + shape = [self.attributes['n_out']] + dims = ['N_OUT_{}'.format(self.index)] + + self.add_output_variable(shape, dims) + + if self.attributes['return_state']: + state_shape = [self.attributes['n_out']] + state_dims = ['N_OUT_{}'.format(self.index)] + self.add_output_variable(state_shape, state_dims, out_name=self.outputs[1], var_name='layer{index}_h', type_name='layer{index}_h_t') + self.add_output_variable(state_shape, state_dims, out_name=self.outputs[2], var_name='layer{index}_c', type_name='layer{index}_c_t') + + self.add_weights() + self.add_bias() + + recurrent_weight = self.model.get_weights_data(self.name, 'recurrent_kernel') + self.add_weights_variable(name='recurrent_weight', var_name='wr{index}', data=recurrent_weight) + class GarNet(Layer): ref_impl = False @@ -1019,6 +1163,10 @@ def _initialize_transforms(self): 'UpSampling1D' : Resize, 'UpSampling2D' : Resize, 'Transpose' : Transpose, + 'Embedding' : Embedding, + 'SimpleRNN' : SimpleRNN, + 'LSTM' : LSTM, + 'GRU' : GRU, 'GarNet' : GarNet, 'GarNetStack' : GarNetStack, # TensorFlow-specific layers: diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_embed.h b/hls4ml/templates/vivado/nnet_utils/nnet_embed.h new file mode 100644 index 0000000000..49a6eed77b --- /dev/null +++ b/hls4ml/templates/vivado/nnet_utils/nnet_embed.h @@ -0,0 +1,47 @@ +#ifndef NNET_EMBED_H_ +#define NNET_EMBED_H_ + +#include "nnet_common.h" +#include "nnet_helpers.h" + +namespace nnet { + +struct embed_config +{ + // Internal data type definitions + typedef float embeddings_t; + + // Layer Sizes + static const unsigned n_in = 10; + static const unsigned n_out = 16; + static const unsigned vocab_size = 50; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; +}; + +template +void embedding( + data_T data[CONFIG_T::n_in], + res_T res[CONFIG_T::n_in * CONFIG_T::n_out], + typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) +{ + + #pragma HLS PIPELINE II=CONFIG_T::reuse_factor + // This can save a few cycles, but it will create a large multiplexer due to + // non-constant access pattern, so let's leave it out + //#pragma HLS ARRAY_PARTITION variable=embeddings complete + + InputSequence: for (int j = 0; j < CONFIG_T::n_in; j++) { + #pragma HLS UNROLL + DenseEmbedding: for (int i = 0; i < CONFIG_T::n_out; i++) { + #pragma HLS UNROLL + res[j * CONFIG_T::n_out + i] = embeddings[data[j] * CONFIG_T::n_out + i]; + } + } +} + +} + +#endif \ No newline at end of file diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_embed_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_embed_stream.h new file mode 100644 index 0000000000..3ada00b244 --- /dev/null +++ b/hls4ml/templates/vivado/nnet_utils/nnet_embed_stream.h @@ -0,0 +1,34 @@ +#ifndef NNET_EMBED_STREAM_H_ +#define NNET_EMBED_STREAM_H_ + +#include "nnet_common.h" +#include "nnet_helpers.h" +#include "hls_stream.h" + +namespace nnet { + +template +void embedding( + hls::stream &data, + hls::stream &res, + typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) +{ + data_T in_data = data.read(); + + InputSequence: for (int j = 0; j < data_T::size; j++) { + #pragma HLS PIPELINE II=CONFIG_T::reuse_factor + + res_T res_pack; + #pragma HLS DATA_PACK variable=res_pack + + DenseEmbedding: for (int i = 0; i < CONFIG_T::n_out; i++) { + #pragma HLS UNROLL + res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i]; + } + res.write(res_pack); + } +} + +} + +#endif \ No newline at end of file diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py index 003282e54c..4947220de3 100644 --- a/hls4ml/utils/config.py +++ b/hls4ml/utils/config.py @@ -100,7 +100,7 @@ def config_from_keras_model(model, granularity='model', default_precision='ap_fi model_arch = json.loads(model.to_json()) #Define supported layers - core_layers = ['InputLayer', 'Dropout', 'Flatten', 'Reshape', 'Permute'] + core_layers = ['InputLayer', 'Dropout', 'Flatten', 'Reshape', 'Permute', 'Embedding'] dense_layers = ['Dense', 'BinaryDense', 'TernaryDense'] conv_layers = ['Conv1D', 'Conv2D', 'BinaryConv2D'] pooling_layers = ['MaxPooling1D', 'MaxPooling2D', 'GlobalMaxPooling1D', 'GlobalMaxPooling2D', 'AveragePooling1D', 'AveragePooling2D', 'GlobalAveragePooling1D', 'GlobalAveragePooling2D'] @@ -111,10 +111,11 @@ def config_from_keras_model(model, granularity='model', default_precision='ap_fi upsampling_layers = ['UpSampling1D', 'UpSampling2D'] reshaping_layers = ['ZeroPadding1D', 'ZeroPadding2D'] graph_layers = ['GarNet', 'GarNetStack'] + rnn_layers = ['SimpleRNN', 'LSTM', 'GRU'] #Define layers to skip because they're not configurable or not converted to HLS skip_layers = ['Dropout', 'Flatten', 'Reshape', 'Permute'] #All supported layers - supported_layers = core_layers + dense_layers + conv_layers + pooling_layers + norm_layers + activation_layers + merge_layers + qkeras_layers + upsampling_layers + reshaping_layers + graph_layers + skip_layers + supported_layers = core_layers + dense_layers + conv_layers + pooling_layers + norm_layers + activation_layers + merge_layers + qkeras_layers + upsampling_layers + reshaping_layers + graph_layers + rnn_layers + skip_layers keras_layer_config = None if model_arch['class_name'] == 'Sequential': @@ -186,12 +187,14 @@ def config_from_keras_model(model, granularity='model', default_precision='ap_fi def make_layer_config(layer): layer_config = {} - if layer['class_name'] in dense_layers + conv_layers: + if layer['class_name'] in dense_layers + conv_layers + rnn_layers: layer_config['Precision'] = {} layer_config['Precision']['weight'] = default_precision layer_config['Precision']['bias'] = default_precision layer_config['Precision']['result'] = default_precision layer_config['ReuseFactor'] = default_reuse_factor + if layer['class_name'] in rnn_layers: + layer_config['Precision']['recurrent_weight'] = default_precision elif layer['class_name'] in activation_layers: layer_config['Precision'] = default_precision diff --git a/test/pytest/test_embed.py b/test/pytest/test_embed.py new file mode 100644 index 0000000000..933082cd58 --- /dev/null +++ b/test/pytest/test_embed.py @@ -0,0 +1,48 @@ +import pytest +import hls4ml +import numpy as np +from pathlib import Path +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, Embedding + +test_root_path = Path(__file__).parent + +@pytest.fixture(scope='module') +def data(): + X = np.random.randint(10, size=(32, 100)) + return X + +@pytest.fixture(scope='module') +def keras_model(): + inputs = Input(shape=(100,), name='embedding_input') + embedding = Embedding(13, 8, input_length=100, name='embedding')(inputs) + model = Model(inputs=inputs, outputs=embedding) + return model + +@pytest.fixture +@pytest.mark.parametrize('io_type', ['io_parallel', + 'io_stream']) +def hls_model(keras_model, io_type): + hls_config = hls4ml.utils.config_from_keras_model(keras_model, + default_precision='ap_fixed<16,6>', + granularity='name') + hls_config['LayerName']['embedding_input']['Precision']['result'] = 'ap_uint<4>' + out_dir = str(test_root_path / 'hls4mlprj_embed_{}').format(io_type) + hls_model = hls4ml.converters.convert_from_keras_model(keras_model, + hls_config=hls_config, + io_type=io_type, + output_dir=out_dir) + + hls_model.compile() + return hls_model + +@pytest.mark.parametrize('io_type', ['io_parallel', + 'io_stream']) +def test_embedding_accuracy(data, keras_model, hls_model): + X = data + model = keras_model + # model under test predictions and accuracy + y_keras = model.predict(X) + y_hls4ml = hls_model.predict(X.astype(np.float)).reshape(y_keras.shape) + # "accuracy" of hls4ml predictions vs keras + np.testing.assert_allclose(y_keras, y_hls4ml, rtol=0, atol=1e-03, verbose=True) diff --git a/test/pytest/test_rnn.py b/test/pytest/test_rnn.py new file mode 100644 index 0000000000..99695fe22d --- /dev/null +++ b/test/pytest/test_rnn.py @@ -0,0 +1,58 @@ +import pytest +import hls4ml +import tensorflow as tf +import numpy as np +from pathlib import Path +from tensorflow.keras import optimizers +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, Embedding, SimpleRNN, LSTM, GRU +import math +from tensorflow.keras import backend as K + +test_root_path = Path(__file__).parent + +rnn_layers = [SimpleRNN, LSTM, GRU] +@pytest.mark.parametrize('rnn_layer', rnn_layers) +@pytest.mark.parametrize('return_sequences', [True, False]) +def test_rnn_parsing(rnn_layer, return_sequences): + time_steps = 3 + input_size = 8 + input_shape = (time_steps, input_size) + + model_input = Input(shape=input_shape) + model_output = rnn_layer(64, return_sequences=return_sequences)(model_input) + + model = Model(model_input, model_output) + model.compile(optimizer='adam', loss='mse') + + config = hls4ml.utils.config_from_keras_model(model, granularity='name') + prj_name = 'hls4mlprj_rnn_{}_seq_{}'.format( + rnn_layer.__class__.__name__.lower(), + int(return_sequences) + ) + output_dir = str(test_root_path / prj_name) + hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir) + + hls_layer = list(hls_model.get_layers())[1] # 0 is input, 1 is the RNN layer + keras_layer = model.layers[1] + + # Basic sanity check, I/O, activations + assert hls_layer.class_name == rnn_layer.__name__ + assert hls_layer.attributes['n_out'] == keras_layer.units + assert hls_layer.attributes['activation'] == keras_layer.activation.__name__ + if 'recurrent_activation' in hls_layer.attributes: # SimpleRNN doesn't have this + assert hls_layer.attributes['recurrent_activation'] == keras_layer.recurrent_activation.__name__ + assert hls_layer.get_input_variable().shape == list(input_shape) + assert hls_layer.get_output_variable().shape == model_output.shape.as_list()[1:] # Ignore the batch size + + # Compare weights + hls_weights = list(hls_layer.get_weights()) # [weights, bias, recurrent_weights, "recurrent_bias" hack] + rnn_weights = keras_layer.get_weights() # [weights, recurrent_weights, bias] + + assert hls_weights[0].data.shape == rnn_weights[0].shape + assert hls_weights[2].data.shape == rnn_weights[1].shape + assert hls_weights[1].data.shape == rnn_weights[2].shape + + np.testing.assert_array_equal(hls_weights[0].data, rnn_weights[0]) + np.testing.assert_array_equal(hls_weights[2].data, rnn_weights[1]) + np.testing.assert_array_equal(hls_weights[1].data, rnn_weights[2])