Skip to content

Commit e6d24f0

Browse files
committed
clone streams up to 3 times
1 parent e5f755c commit e6d24f0

File tree

2 files changed

+58
-4
lines changed

2 files changed

+58
-4
lines changed

hls4ml/model/optimizer/passes/clone.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,44 @@ def config_cpp(self):
2626
clone_function_template = 'nnet::clone_stream<{input_t}, {output_t}, {size}>({input}, {output1}, {output2});'
2727
clone_include_list = ['nnet_utils/nnet_stream.h']
2828

29+
class CloneThree(Layer):
30+
''' Inserted after the layer whose output is used more than twice.'''
31+
32+
def initialize(self):
33+
inp = self.get_input_variable()
34+
self.add_output_variable(inp.shape, inp.dim_names, out_name=self.outputs[0], var_name='layer{index}_cpy1')
35+
self.add_output_variable(inp.shape, inp.dim_names, out_name=self.outputs[1], var_name='layer{index}_cpy2')
36+
self.add_output_variable(inp.shape, inp.dim_names, out_name=self.outputs[2], var_name='layer{index}_cpy3')
37+
38+
def function_cpp(self):
39+
params = self._default_function_params()
40+
params['size'] = self.get_attr('size')
41+
params['output1'] = self.variables[self.outputs[0]].name
42+
params['output2'] = self.variables[self.outputs[1]].name
43+
params['output3'] = self.variables[self.outputs[2]].name
44+
return [self._function_template.format(**params)]
45+
46+
def config_cpp(self):
47+
return None
48+
49+
clonethree_function_template = 'nnet::clone_stream<{input_t}, {output_t}, {size}>({input}, {output1}, {output2}, {output3});'
50+
clonethree_include_list = ['nnet_utils/nnet_stream.h']
51+
2952
# Register the layer types to the layer map
3053
register_layer('Clone', Clone)
54+
register_layer('CloneThree', CloneThree)
3155

3256
# Register the templates for config and function
3357
for backend in ['Vivado', 'VivadoAccelerator']:
3458
templates.get_backend(backend).register_templates('Clone', clone_function_template, None, clone_include_list)
59+
templates.get_backend(backend).register_templates('CloneThree', clonethree_function_template, None, clonethree_include_list)
3560

3661

3762
class CloneOutput(OptimizerPass):
3863
''' Clones streams that are used multiple times '''
3964
def match(self, node):
4065
# We may have already inserted the Clone layer
41-
if node.__class__.__name__ == 'Clone':
66+
if node.__class__.__name__ in ['Clone', 'CloneThree']:
4267
return False
4368

4469
return True
@@ -59,8 +84,8 @@ def transform(self, model, node):
5984
transformed = False
6085
for output in node.outputs:
6186
if len(output_map[output]) > 1:
62-
if len(output_map[output]) > 2:
63-
print('WARN: Cannot clone output {} of {} ({})'.format(output, node.__class__.__name__, node.name))
87+
if len(output_map[output]) > 3:
88+
print('WARNING: Cloning output {} of {} ({}) more than 3 times not currently supported'.format(output, node.__class__.__name__, node.name))
6489
return False
6590
out_var = node.get_output_variable(output)
6691
for i, layer in enumerate(output_map[output], 1):
@@ -69,7 +94,10 @@ def transform(self, model, node):
6994
}
7095
idx = layer.inputs.index(output)
7196
layer.inputs[idx] = output + '_cpy' + str(i)
72-
clone_layer = model.make_node('Clone', 'clone_' + node.name, attrs, [output], [output + '_cpy1', output + '_cpy2'])
97+
if len(output_map[output]) == 3:
98+
clone_layer = model.make_node('CloneThree', 'clone_' + node.name, attrs, [output], [output + '_cpy1', output + '_cpy2', output + '_cpy3'])
99+
else:
100+
clone_layer = model.make_node('Clone', 'clone_' + node.name, attrs, [output], [output + '_cpy1', output + '_cpy2'])
73101
model.insert_node(clone_layer)
74102
transformed = True
75103

hls4ml/templates/vivado/nnet_utils/nnet_stream.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,32 @@ void clone_stream(hls::stream<data_T> &data, hls::stream<res_T> &res1, hls::stre
3737
}
3838
}
3939

40+
template<class data_T, class res_T, int N>
41+
void clone_stream(hls::stream<data_T> &data, hls::stream<res_T> &res1, hls::stream<res_T> &res2, hls::stream<res_T> &res3) {
42+
CloneLoop: for (int i = 0; i < N / data_T::size; i++) {
43+
#pragma HLS PIPELINE
44+
45+
data_T in_data = data.read();
46+
res_T out_data1;
47+
res_T out_data2;
48+
res_T out_data3;
49+
#pragma HLS DATA_PACK variable=out_data1
50+
#pragma HLS DATA_PACK variable=out_data2
51+
#pragma HLS DATA_PACK variable=out_data3
52+
53+
ClonePack: for (int j = 0; j < data_T::size; j++) {
54+
#pragma HLS UNROLL
55+
out_data1[j] = in_data[j];
56+
out_data2[j] = in_data[j];
57+
out_data3[j] = in_data[j];
58+
}
59+
60+
res1.write(out_data1);
61+
res2.write(out_data2);
62+
res3.write(out_data3);
63+
}
64+
}
65+
4066
template<class data_T, class res_T, int N>
4167
void repack_stream(hls::stream<data_T> &data, hls::stream<res_T> &res) {
4268
if (data_T::size == res_T::size) {

0 commit comments

Comments
 (0)