fastmachinelearning · vloncar · May 31, 2022 · Dec 10, 2021 · May 31, 2022 · May 31, 2022
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1d.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1d.h
@@ -65,6 +65,22 @@ void conv_1d_cl(
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void pointwise_conv_1d_cl(
+    data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
+    res_T  res[CONFIG_T::out_width * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt])
+{
+    assert(CONFIG_T::filt_width == 1);
+
+    if (CONFIG_T::strategy == nnet::latency) {
+        pointwise_conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
+    } else {
+        pointwise_conv_1d_resource_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
+    }
+}
+
 }//end namespace
 
 #endif
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_latency.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_latency.h
@@ -119,7 +119,7 @@ void conv_1d_latency_cl(
 }
 
 template<class data_T, class res_T, typename CONFIG_T>
-void pointwise_conv_1d_cl(
+void pointwise_conv_1d_latency_cl(
     data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
     res_T  res[CONFIG_T::out_width * CONFIG_T::n_filt],
     typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],

diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_resource.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_resource.h
@@ -160,6 +160,24 @@ void im2col_1d_cl(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan], data_T dat
     }
 }
 
+template<class data_T, typename CONFIG_T>
+void im2col_1d_pointwise_cl(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan], data_T data_col[CONFIG_T::n_chan], const int col) {
+    int index = 0;
+    ChannelLoop:
+    for (int channel = 0; channel < CONFIG_T::n_chan; channel++) {
+        #pragma HLS UNROLL
+
+        int index_data = (col*CONFIG_T::stride_width-CONFIG_T::pad_left) * CONFIG_T::n_chan + channel;
+
+        if (index_data >= 0 && index_data < CONFIG_T::in_width*CONFIG_T::n_chan) {
+            data_col[index] = data[index_data];
+        } else {
+            data_col[index] = 0;
+        }
+        index++;
+    }
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 void conv_1d_resource_cl(
     data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
@@ -195,5 +213,43 @@ void conv_1d_resource_cl(
     }
 }
 
+
+template<class data_T, class res_T, typename CONFIG_T>
+void pointwise_conv_1d_resource_cl(
+    data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
+    res_T  res[CONFIG_T::out_width * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt]
+)
+{
+    assert(CONFIG_T::filt_width == 1);
+
+    const int nin = CONFIG_T::n_chan;
+    const int nout = CONFIG_T::n_filt;
+    const int rufactor = CONFIG_T::reuse_factor;
+    const int block_factor = DIV_ROUNDUP(nin*nout, rufactor);
+
+    //#pragma HLS function_instantiate variable=weights,biases
+    //#pragma HLS RESOURCE         variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
+    //#pragma HLS ARRAY_RESHAPE   variable=weights block factor=block_factor
+    //#pragma HLS ARRAY_PARTITION variable=biases complete
+
+    data_T data_col[CONFIG_T::n_chan];
+    res_T res_col[CONFIG_T::n_filt];
+
+    #pragma HLS ARRAY_PARTITION variable=data_col complete
+    #pragma HLS ARRAY_PARTITION variable=res_col complete
+
+    ColLoop:
+    for (int i = 0; i < CONFIG_T::out_width; i++) {
+        #pragma HLS PIPELINE
+        im2col_1d_pointwise_cl<data_T, CONFIG_T>(data, data_col, i);
+        dense_resource<data_T, res_T, typename CONFIG_T::mult_config>(data_col, res_col, weights, biases);
+        for (int j = 0; j < CONFIG_T::n_filt; j++) {
+            res[i * CONFIG_T::n_filt + j] = res_col[j];
+        }
+    }
+}
+
 }
 #endif
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv2d.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv2d.h
@@ -86,6 +86,22 @@ void conv_2d_cl(
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void pointwise_conv_2d_cl(
+    data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
+    res_T  res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt])
+{
+    assert(CONFIG_T::filt_width == 1);
+
+    if (CONFIG_T::strategy == nnet::latency) {
+        pointwise_conv_2d_latency_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
+    } else {
+        pointwise_conv_2d_resource_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
+    }
+}
+
 }//end namespace
 
 #endif
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_latency.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_latency.h
@@ -282,7 +282,7 @@ void conv_2d_latency_cl(
 }//end conv2d
 
 template<class data_T, class res_T, typename CONFIG_T>
-void pointwise_conv_2d_cl(
+void pointwise_conv_2d_latency_cl(
     data_T data[CONFIG_T::in_height*CONFIG_T::in_width*CONFIG_T::n_chan],
     res_T  res[CONFIG_T::out_height*CONFIG_T::out_width*CONFIG_T::n_filt],
     typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],

diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_resource.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_resource.h
@@ -174,6 +174,32 @@ void im2col_2d_cl(
     }
 }
 
+template<class data_T, typename CONFIG_T>
+void im2col_2d_pointwise_cl(
+    data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
+    data_T data_col[CONFIG_T::n_chan],
+    const int row,
+    const int col)
+{
+    int index = 0;
+    int input_row = -CONFIG_T::pad_top + row * CONFIG_T::stride_height;
+
+    ChannelLoop:
+    for (int channel = 0; channel < CONFIG_T::n_chan; channel++) {
+        #pragma HLS UNROLL
+        if (input_row < 0 || input_row >= CONFIG_T::in_height) {
+            data_col[index++] = 0;
+        } else {
+            int input_col = -CONFIG_T::pad_left + col * CONFIG_T::stride_width;
+            if (input_col >= 0 && input_col < CONFIG_T::in_width) {
+                data_col[index++] = data[input_row * CONFIG_T::in_width * CONFIG_T::n_chan + input_col * CONFIG_T::n_chan + channel];
+            } else {
+                data_col[index++] = 0;
+            }
+        }
+    }
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 void conv_2d_resource_cl(
     data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
@@ -212,5 +238,47 @@ void conv_2d_resource_cl(
     }
 }
 
+
+template<class data_T, class res_T, typename CONFIG_T>
+void pointwise_conv_2d_resource_cl(
+    data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
+    res_T  res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt]
+)
+{
+    assert(CONFIG_T::filt_height == 1 && CONFIG_T::filt_width == 1);
+
+    const int nin = CONFIG_T::n_chan;
+    const int nout = CONFIG_T::n_filt;
+    const int rufactor = CONFIG_T::reuse_factor;
+    const int block_factor = DIV_ROUNDUP(nin*nout, rufactor);
+
+    //#pragma HLS function_instantiate variable=weights,biases
+    //#pragma HLS RESOURCE         variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
+    //#pragma HLS ARRAY_RESHAPE   variable=weights block factor=block_factor
+    //#pragma HLS ARRAY_PARTITION variable=biases complete
+
+    data_T data_col[CONFIG_T::n_chan];
+    res_T res_col[CONFIG_T::n_filt];
+
+    #pragma HLS ARRAY_PARTITION variable=data_col complete
+    #pragma HLS ARRAY_PARTITION variable=res_col complete
+
+    HeightLoop:
+    for (int i = 0; i < CONFIG_T::out_height; i++) {
+        WidthLoop:
+        for (int j = 0; j < CONFIG_T::out_width; j++) {
+            #pragma HLS PIPELINE
+            im2col_2d_pointwise_cl<data_T, CONFIG_T>(data, data_col, i, j);
+            dense<data_T, res_T, typename CONFIG_T::mult_config>(data_col, res_col, weights, biases);
+            FiltLoop:
+            for (int k = 0; k < CONFIG_T::n_filt; k++) {
+                res[i * CONFIG_T::out_width * CONFIG_T::n_filt + j * CONFIG_T::n_filt + k] = res_col[k];
+            }
+        }
+    }
+}
+
 }
 #endif
diff --git a/test/pytest/test_pointwiseconv.py b/test/pytest/test_pointwiseconv.py
@@ -0,0 +1,79 @@
+import pytest
+import hls4ml
+import tensorflow as tf
+import numpy as np
+from pathlib import Path
+from tensorflow.keras.layers import Conv1D, Conv2D
+from tensorflow.keras import backend as K
+
+test_root_path = Path(__file__).parent
+
+padds_options = ['same', 'valid']
+chans_options = ['channels_last']
+io_type_options = ['io_parallel', 'io_stream']
+strides1d_options = [(1,), (2,)]
+strides2d_options = [(1, 1), (2, 2)]
+strategy_options = ['Latency', 'Resource']
+
+@pytest.mark.parametrize("chans", chans_options)
+@pytest.mark.parametrize("padds", padds_options)
+@pytest.mark.parametrize("strides", strides1d_options)
+@pytest.mark.parametrize("io_type", io_type_options)
+@pytest.mark.parametrize("strategy", strategy_options)
+def test_pointwiseconv1d(chans, padds, strides, io_type, strategy):
+    model = tf.keras.models.Sequential()
+    input_shape = (28, 3)
+    model.add(Conv1D(filters=32,
+                     kernel_size=(1,),
+                     strides=strides,
+                     padding=padds,
+                     input_shape=input_shape,
+                     kernel_initializer='normal',
+                     use_bias=False,
+                     data_format=chans
+                     ))
+
+    model.compile(optimizer='adam', loss='mse')
+    X_input = np.random.rand(100, *input_shape)
+    keras_prediction = model.predict(X_input)
+    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
+    config['Model']['Strategy'] = strategy
+    output_dir = str(test_root_path / 'hls4mlprj_pointwise1d_{}_strides_{}_{}_padding_{}_{}'.format(chans, strides[0], padds, io_type, strategy))
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type=io_type)
+    hls_model.compile()
+    hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
+
+    assert 'Pointwise' in list(hls_model.graph.values())[1].class_name
+    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
+
+@pytest.mark.parametrize("chans", chans_options)
+@pytest.mark.parametrize("padds", padds_options)
+@pytest.mark.parametrize("strides", strides2d_options)
+@pytest.mark.parametrize("io_type", io_type_options)
+@pytest.mark.parametrize("strategy", strategy_options)
+def test_pointwiseconv2d(chans, padds, strides, io_type, strategy):
+    model = tf.keras.models.Sequential()
+    input_shape = (28, 28, 3)
+    model.add(Conv2D(filters=32,
+                     kernel_size=(1, 1),
+                     strides=strides,
+                     padding=padds,
+                     input_shape=input_shape,
+                     kernel_initializer='normal',
+                     use_bias=False,
+                     data_format=chans
+                     ))
+
+    model.compile(optimizer='adam', loss='mse')
+    X_input = np.random.rand(100, *input_shape)
+    keras_prediction = model.predict(X_input)
+    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
+    config['Model']['Strategy'] = strategy
+    stride_cfg = str(strides).replace(', ', '_').replace('(', '').replace(')', '')
+    output_dir = str(test_root_path / 'hls4mlprj_pointwise2d_{}_strides_{}_{}_padding_{}_{}'.format(chans, stride_cfg, padds, io_type, strategy))
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type=io_type)
+    hls_model.compile()
+    hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
+
+    assert 'Pointwise' in list(hls_model.graph.values())[1].class_name
+    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)