Skip to content

Transpose2d, Concatenate2d, and up to 3 Clones for io_stream #402

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions hls4ml/backends/vivado/passes/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,25 @@ class Clone(Layer):

def initialize(self):
inp = self.get_input_variable()
self.add_output_variable(inp.shape, inp.dim_names, out_name=self.outputs[0], var_name='layer{index}_cpy1')
self.add_output_variable(inp.shape, inp.dim_names, out_name=self.outputs[1], var_name='layer{index}_cpy2')
for i, out_name in enumerate(self.outputs):
self.add_output_variable(inp.shape, inp.dim_names, out_name=out_name, var_name='layer{index}_cpy' + str(i + 1))

clone_function_template = 'nnet::clone_stream<{input_t}, {output_t}, {size}>({input}, {output1}, {output2});'
clone_include_list = ['nnet_utils/nnet_stream.h']

class CloneFunctionTemplate(FunctionCallTemplate):
def __init__(self):
super().__init__(Clone, include_header=clone_include_list)
self.template = clone_function_template
self.template = None # to be filled once number of clones known

def format(self, node):
params = self._default_function_params(node)
params['output1'] = node.variables[node.outputs[0]].name
params['output2'] = node.variables[node.outputs[1]].name
for i, output in enumerate(node.outputs):
params['output' + str(i + 1)] = node.variables[node.outputs[i]].name

if self.template is None:
self.template = 'nnet::clone_stream<{input_t}, {output_t}, {size}>({input}, ' + \
', '.join(['{output' + str(i + 1) + '}' for i in range(len(node.outputs))]) + \
');'

return self.template.format(**params)

Expand Down Expand Up @@ -63,8 +67,8 @@ def transform(self, model, node):
transformed = False
for output in node.outputs:
if len(output_map[output]) > 1:
if len(output_map[output]) > 2:
print('WARN: Cannot clone output {} of {} ({})'.format(output, node.class_name, node.name))
if len(output_map[output]) > 3:
print('WARNING: Cloning output {} of {} ({}) more than 3 times not currently supported'.format(output, node.__class__.__name__, node.name))
return False
out_var = node.get_output_variable(output)
for i, layer in enumerate(output_map[output], 1):
Expand All @@ -73,7 +77,7 @@ def transform(self, model, node):
}
idx = layer.inputs.index(output)
layer.inputs[idx] = output + '_cpy' + str(i)
clone_layer = model.make_node(Clone, 'clone_' + node.name, attrs, [output], [output + '_cpy1', output + '_cpy2'])
clone_layer = model.make_node(Clone, 'clone_' + node.name, attrs, [output], [output + '_cpy' + str(i + 1) for i in range(len(output_map[output]))])
model.insert_node(clone_layer)
transformed = True

Expand Down
6 changes: 3 additions & 3 deletions hls4ml/backends/vivado/passes/reshaping_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,17 @@ def format(self, node):
static constexpr unsigned perm[3] = {{{perm_str}}};
}};\n"""

transpose_function_template = 'nnet::transpose_{dim}<{input_t}, {config}>({input}, {output});'
transpose_function_template = 'nnet::transpose_{dim}<{input_t}, {output_t}, {config}>({input}, {output});'

transpose_include_list = ['nnet_utils/nnet_array.h']
transpose_include_list = ['nnet_utils/nnet_array.h', 'nnet_utils/nnet_stream.h']

class TransposeConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__(Transpose)
self.template = transpose_config_template

def format(self, node):
params = self._default_config_params()
params = self._default_config_params(node)

return self.template.format(**params)

Expand Down
4 changes: 2 additions & 2 deletions hls4ml/model/optimizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
try:
import qkeras
register_flow('convert', ['fuse_bias_add', 'remove_useless_transpose', 'output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization']) # TODO Maybe not all QKeras optmizers belong here?
register_flow('optimize', ['eliminate_linear_activation', 'fuse_consecutive_batch_normalization', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv'], requires=['convert'])
register_flow('optimize', ['eliminate_linear_activation', 'fuse_consecutive_batch_normalization', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv', 'set_precision_concat'], requires=['convert'])
except:
register_flow('convert', ['fuse_bias_add', 'remove_useless_transpose'])
register_flow('optimize', ['eliminate_linear_activation', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv'], requires=['convert'])
register_flow('optimize', ['eliminate_linear_activation', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv', 'set_precision_concat'], requires=['convert'])

del opt_path
del module_path
Expand Down
39 changes: 39 additions & 0 deletions hls4ml/model/optimizer/passes/precision_merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from hls4ml.model.optimizer import OptimizerPass
from hls4ml.model.types import FixedPrecisionType

def get_concat_type(itype1, itype2):
newwidth = max(itype1.width, itype2.width)
newint = max(itype1.integer, itype2.integer)
if (itype1.signed ^ itype2.signed): # XOR
newint += 1
newwidth += 1
newrmode = itype1.rounding_mode if itype1.rounding_mode is not None else itype2.rounding_mode
newsmode = itype1.saturation_mode if itype1.saturation_mode is not None else itype2.saturation_mode
newsbits = itype1.saturation_bits if itype1.saturation_bits is not None else itype2.saturation_bits

newtype = FixedPrecisionType(newwidth, newint, itype1.signed or itype2.signed,
newrmode, newsmode, newsbits)
return newtype

class SetPrecisionConcat(OptimizerPass):
def match(self, node):
if node.__class__.__name__ == 'Concatenate':
otype = node.get_output_variable().type.precision
itype1 = node.get_input_variable(node.inputs[0]).type.precision
itype2 = node.get_input_variable(node.inputs[1]).type.precision
if isinstance(otype, FixedPrecisionType) and otype != get_concat_type(itype1, itype2):
return True
return False

def transform(self, model, node):
"""
Set concat output precision
"""
otype = node.get_output_variable().type.precision
itype1 = node.get_input_variable(node.inputs[0]).type.precision
itype2 = node.get_input_variable(node.inputs[1]).type.precision
newtype = get_concat_type(itype1, itype2)
print("Found {} in the model, optimizing {} to {}...".format(node.name, otype, newtype))
node.get_output_variable().type.precision = newtype

return True
8 changes: 4 additions & 4 deletions hls4ml/templates/vivado/nnet_utils/nnet_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ struct transpose_config {
static constexpr unsigned perm[3] = {2, 0, 1};
};

template<class data_T, typename CONFIG_T>
template<class data_T, class res_T, typename CONFIG_T>
void transpose_2d(
data_T data[CONFIG_T::height * CONFIG_T::width],
data_T data_t[CONFIG_T::height * CONFIG_T::width]
res_T data_t[CONFIG_T::height * CONFIG_T::width]
) {
#pragma HLS PIPELINE

Expand All @@ -26,10 +26,10 @@ void transpose_2d(
}
}

template<class data_T, typename CONFIG_T>
template<class data_T, class res_T, typename CONFIG_T>
void transpose_3d(
data_T data[CONFIG_T::depth * CONFIG_T::height * CONFIG_T::width],
data_T data_t[CONFIG_T::depth * CONFIG_T::height * CONFIG_T::width]
res_T data_t[CONFIG_T::depth * CONFIG_T::height * CONFIG_T::width]
) {
unsigned dims[3] = { CONFIG_T::depth, CONFIG_T::height, CONFIG_T::width };
unsigned dims_t[3];
Expand Down
104 changes: 104 additions & 0 deletions hls4ml/templates/vivado/nnet_utils/nnet_merge_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,110 @@ void concatenate3d(
}
}

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate2d_0(
hls::stream<input1_T> &data1,
hls::stream<input2_T> &data2,
hls::stream<res_T> &res)
{
ConcatLoopHeight1: for (int i = 0; i < CONFIG_T::n_elem1_0; i++) {
#pragma HLS PIPELINE II=1

input1_T in_data1 = data1.read();
res_T out_data;
#pragma HLS DATA_PACK variable=out_data

ConcatPackInput1: for (int k = 0; k < input1_T::size; k++) {
#pragma HLS UNROLL
out_data[k] = in_data1[k];
}

res.write(out_data);
}
ConcatLoopHeight2: for (int i = 0; i < CONFIG_T::n_elem2_0; i++) {
#pragma HLS PIPELINE II=1

input2_T in_data2 = data2.read();
res_T out_data;
#pragma HLS DATA_PACK variable=out_data

ConcatPackInput2: for (int k = 0; k < input2_T::size; k++) {
#pragma HLS UNROLL
out_data[k] = in_data2[k];
}

res.write(out_data);
}
}

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate2d_1(
hls::stream<input1_T> &data1,
hls::stream<input2_T> &data2,
hls::stream<res_T> &res)
{
ConcatLoopHeight: for (int i = 0; i < CONFIG_T::n_elem1_0; i++) {
#pragma HLS PIPELINE II=1

input1_T in_data1 = data1.read();
input2_T in_data2 = data2.read();
res_T out_data;
#pragma HLS DATA_PACK variable=out_data

ConcatPackInput1: for (int k = 0; k < input1_T::size; k++) {
#pragma HLS UNROLL
out_data[k] = in_data1[k];
}

ConcatPackInput2: for (int k = 0; k < input2_T::size; k++) {
#pragma HLS UNROLL
out_data[input1_T::size + k] = in_data2[k];
}

res.write(out_data);
}
}

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate2d(
hls::stream<input1_T> &data1,
hls::stream<input2_T> &data2,
hls::stream<res_T> &res)
{
if (CONFIG_T::axis == 2 || CONFIG_T::axis == -1) {
concatenate2d_1<input1_T, input2_T, res_T, CONFIG_T>(data1, data2, res);
} else {
concatenate2d_0<input1_T, input2_T, res_T, CONFIG_T>(data1, data2, res);
}
}

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate1d(
hls::stream<input1_T> &data1,
hls::stream<input2_T> &data2,
hls::stream<res_T> &res)
{
res_T out_data;
#pragma HLS DATA_PACK variable=out_data
ConcatLoop1: for (int i = 0; i < CONFIG_T::n_elem1_0 / input1_T::size; i++) {
#pragma HLS PIPELINE
input1_T in_data1 = data1.read();
ConcatPack1: for (int j = 0; j < res_T::size; j++) {
#pragma HLS UNROLL
out_data[j] = in_data1[j];
}
res.write(out_data);
}
ConcatLoop2: for (int i = 0; i < CONFIG_T::n_elem2_0 / input2_T::size; i++) {
#pragma HLS PIPELINE
input2_T in_data2 = data2.read();
ConcatPack2: for (int j = 0; j < res_T::size; j++) {
#pragma HLS UNROLL
out_data[j] = in_data2[j];
}
res.write(out_data);
}
}
}

#endif
50 changes: 50 additions & 0 deletions hls4ml/templates/vivado/nnet_utils/nnet_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,32 @@ void clone_stream(hls::stream<data_T> &data, hls::stream<res_T> &res1, hls::stre
}
}

template<class data_T, class res_T, int N>
void clone_stream(hls::stream<data_T> &data, hls::stream<res_T> &res1, hls::stream<res_T> &res2, hls::stream<res_T> &res3) {
CloneLoop: for (int i = 0; i < N / data_T::size; i++) {
#pragma HLS PIPELINE

data_T in_data = data.read();
res_T out_data1;
res_T out_data2;
res_T out_data3;
#pragma HLS DATA_PACK variable=out_data1
#pragma HLS DATA_PACK variable=out_data2
#pragma HLS DATA_PACK variable=out_data3

ClonePack: for (int j = 0; j < data_T::size; j++) {
#pragma HLS UNROLL
out_data1[j] = in_data[j];
out_data2[j] = in_data[j];
out_data3[j] = in_data[j];
}

res1.write(out_data1);
res2.write(out_data2);
res3.write(out_data3);
}
}

template<class data_T, class res_T, int N>
void repack_stream(hls::stream<data_T> &data, hls::stream<res_T> &res) {
if (data_T::size == res_T::size) {
Expand Down Expand Up @@ -145,6 +171,30 @@ void broadcast_stream(hls::stream<data_T> &data, hls::stream<res_T> &res) {
broadcast_stream_HxWx1<data_T, res_T, CONFIG_T>(data, res);
}
}

template<class data_T, class res_T, typename CONFIG_T>
void transpose_2d(hls::stream<data_T> &data, hls::stream<res_T> &res) {
typename data_T::value_type data_array[CONFIG_T::height * CONFIG_T::width];
#pragma HLS ARRAY_PARTITION variable=data_array complete

for (int i = 0; i < CONFIG_T::height * CONFIG_T::width / data_T::size; i++) {
#pragma HLS PIPELINE
data_T in_data = data.read();
for (int j = 0; j < data_T::size; j++) {
data_array[i * data_T::size + j] = typename data_T::value_type(in_data[j]);
}
}

for (int i = 0; i < CONFIG_T::height * CONFIG_T::width / res_T::size; i++) {
#pragma HLS PIPELINE
res_T out_data;
#pragma HLS DATA_PACK variable=out_data
for (int j = 0; j < res_T::size; j++) {
out_data[j] = typename res_T::value_type(data_array[j * data_T::size + i]);
}
res.write(out_data);
}
}
}

#endif
48 changes: 48 additions & 0 deletions test/pytest/test_transpose_concat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest
import hls4ml
import numpy as np
from tensorflow.keras.models import model_from_json, Model
from tensorflow.keras.layers import Input, Permute, Concatenate, Activation
import yaml

@pytest.fixture(scope='module')
def data():
X = np.random.rand(100, 2, 3)
return X

@pytest.fixture(scope='module')
def keras_model():
inp = Input(shape=(2, 3), name='input_1')
x = Permute((2, 1))(inp)
y = Concatenate(axis=1)([x, x])
x = Activation('relu', name='relu')(x)
out = Concatenate(axis=1)([x, y])
model = Model(inputs=inp, outputs=out)
return model

@pytest.fixture
@pytest.mark.parametrize('io_type', ['io_parallel',
'io_stream'])
def hls_model(keras_model, io_type):
hls_config = hls4ml.utils.config_from_keras_model(keras_model,
default_precision='ap_fixed<16,3,AP_RND_CONV,AP_SAT>',
granularity='name')
hls_config['LayerName']['relu']['Precision'] = 'ap_ufixed<17,3>'
hls_model = hls4ml.converters.convert_from_keras_model(keras_model,
hls_config=hls_config,
io_type=io_type,
output_dir='hls4mlprj_transpose_{}'.format(io_type))

hls_model.compile()
return hls_model

@pytest.mark.parametrize('io_type', ['io_parallel',
'io_stream'])
def test_accuracy(data, keras_model, hls_model):
X = data
model = keras_model
# model under test predictions and accuracy
y_keras = model.predict(X)
y_hls4ml = hls_model.predict(X).reshape(y_keras.shape)
# "accuracy" of hls4ml predictions vs keras
np.testing.assert_allclose(y_keras, y_hls4ml, rtol=0, atol=1e-04, verbose=True)