fastmachinelearning · thesps · Oct 27, 2022 · Aug 3, 2022 · Oct 18, 2022 · Oct 18, 2022
diff --git a/hls4ml/backends/fpga/passes/remove_softmax.py b/hls4ml/backends/fpga/passes/remove_softmax.py
@@ -0,0 +1,12 @@
+from hls4ml.model.layers import Softmax
+from hls4ml.model.optimizer.optimizer import OptimizerPass
+
+class SkipSoftmax(OptimizerPass):
+    def match(self, node):
+        is_softmax = isinstance(node, Softmax)
+        remove_softmax = node.get_attr('skip', False)
+        return is_softmax and remove_softmax
+
+    def transform(self, model, node):
+        model.remove_node(node, rewire=True)
+        return True
diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py
@@ -62,6 +62,7 @@ def _register_flows(self):
         optimization_passes = [
             'quartus:remove_final_reshape',
             'quartus:optimize_pointwise_conv',
+            'quartus:skip_softmax'
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
 

diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
@@ -52,6 +52,7 @@ def _register_flows(self):
         optimization_passes = [
             'vivado:remove_final_reshape',
             'vivado:optimize_pointwise_conv',
+            'vivado:skip_softmax'
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
 

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
@@ -697,7 +697,8 @@ def initialize(self):
 
 class Softmax(Activation):
     _expected_attributes = [
-        ChoiceAttribute('implementation', ['latency', 'stable', 'legacy'], default='stable')
+        ChoiceAttribute('implementation', ['latency', 'stable', 'argmax', 'legacy'], default='stable'),
+        Attribute('skip', value_type=bool, default=False),
     ]
 
     def initialize(self):

diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation.h
@@ -20,7 +20,6 @@
 #ifndef NNET_ACTIVATION_H_
 #define NNET_ACTIVATION_H_
 
-//#include <cmath>
 #include "nnet_common.h"
 
 namespace nnet {
@@ -127,7 +126,7 @@ void  sigmoid(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 //       Softmax Activation
 // *************************************************
 
-enum class softmax_implementation {latency=0, legacy=1, stable=2};
+enum class softmax_implementation {latency=0, legacy=1, stable=2, argmax=3};
 
 template<class data_T, typename CONFIG_T>
 inline unsigned softmax_stable_idx_from_real_val(const data_T x){
@@ -242,6 +241,27 @@ void softmax_legacy(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
+    #pragma unroll
+    for (int i = 0; i < CONFIG_T::n_in; i++) {
+        res[i] = (res_T) 0;
+    }
+
+    hls_register data_T maximum = data[0];
+    hls_register int idx = 0; 
+
+    #pragma ii 1
+    for (int i = 1; i < CONFIG_T::n_in; i++) {
+        if (data[i] > maximum) {
+            maximum = data[i];
+            idx = i;
+        }
+    }
+
+    res[idx] = (res_T) 1;
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 inline void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
     switch(CONFIG_T::implementation) {
@@ -257,6 +277,9 @@ inline void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
         default:
             softmax_stable<data_T, res_T, CONFIG_T>(data, res);
             break;
+        case softmax_implementation::argmax:
+            softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+            break;
     }
 }
 

diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation_stream.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_activation_stream.h
@@ -417,6 +417,34 @@ void softmax_legacy(stream<data_T> &data, stream<res_T> &res) {
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(stream<data_T> &data, stream<res_T> &res) {
+    #pragma ii 1
+    for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
+        data_T in_data = data.read();
+        res_T out_data;
+
+        #pragma unroll
+        for (int i = 0; i < res_T::size; i++) {
+            out_data[i] = (typename res_T::value_type) 0;
+        }
+
+        hls_register typename data_T::value_type maximum = in_data[0];
+        hls_register int idx = 0; 
+
+        #pragma ii 1
+        for (int i = 1; i < res_T::size; i++) {
+            if (in_data[i] > maximum) {
+                maximum = in_data[i];
+                idx = i;
+            }
+        }
+
+        out_data[idx] = (typename res_T::value_type) 1;
+        res.write(out_data);
+    }
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 void softmax(stream<data_T> &data, stream<res_T> &res) {
     switch(CONFIG_T::implementation) {
@@ -429,6 +457,9 @@ void softmax(stream<data_T> &data, stream<res_T> &res) {
         case softmax_implementation::legacy:
             softmax_legacy<data_T, res_T, CONFIG_T>(data, res);
             break;
+        case softmax_implementation::argmax:
+            softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+            break;
         default:
             softmax_stable<data_T, res_T, CONFIG_T>(data, res);
             break;

diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_activation.h b/hls4ml/templates/vivado/nnet_utils/nnet_activation.h
@@ -155,7 +155,7 @@ void  sigmoid(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 //       Softmax Activation
 // *************************************************
 
-enum class softmax_implementation {latency=0, legacy=1, stable=2};
+enum class softmax_implementation {latency=0, legacy=1, stable=2, argmax=3};
 
 inline float exp_fcn_float(float input) {
     return std::exp(input);
@@ -382,6 +382,27 @@ void  softmax_legacy(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
+    for (int i = 0; i < CONFIG_T::n_in; i++) {
+        #pragma HLS UNROLL
+        res[i] = (res_T) 0;
+    }
+
+    data_T maximum = data[0];
+    int idx = 0; 
+
+    for (int i = 1; i < CONFIG_T::n_in; i++) {
+        #pragma HLS PIPELINE
+        if (data[i] > maximum) {
+            maximum = data[i];
+            idx = i;
+        }
+    }
+
+    res[idx] = (res_T) 1;
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
     #pragma HLS inline
@@ -395,6 +416,9 @@ void softmax(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]){
     case softmax_implementation::legacy:
         softmax_legacy<data_T, res_T, CONFIG_T>(data, res);
         break;
+    case softmax_implementation::argmax:
+        softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+        break;
     }
 }
 
@@ -776,4 +800,4 @@ void  ternary_tanh(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in])
 
 }
 
-#endif
+#endif
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h
@@ -314,6 +314,35 @@ void softmax_legacy(hls::stream<data_T> &data, hls::stream<res_T> &res) {
     }
 }
 
+template<class data_T, class res_T, typename CONFIG_T>
+void softmax_argmax(hls::stream<data_T> &data, hls::stream<res_T> &res) {
+    for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
+        #pragma HLS PIPELINE
+        data_T in_data = data.read();
+        res_T out_data;
+
+        for (int i = 0; i < res_T::size; i++) {
+            #pragma HLS UNROLL
+            out_data[i] = (typename res_T::value_type) 0;
+        }
+
+        typename data_T::value_type maximum = in_data[0];
+        int idx = 0; 
+
+        for (int i = 1; i < res_T::size; i++) {
+            #pragma HLS PIPELINE
+            if (in_data[i] > maximum) {
+                maximum = in_data[i];
+                idx = i;
+            }
+        }
+
+        out_data[idx] = (typename res_T::value_type) 1;
+        res.write(out_data);
+    }
+}
+
+
 template<class data_T, class res_T, typename CONFIG_T>
 void softmax(hls::stream<data_T> &data, hls::stream<res_T> &res){
     assert(CONFIG_T::axis == -1);
@@ -328,7 +357,10 @@ void softmax(hls::stream<data_T> &data, hls::stream<res_T> &res){
     case softmax_implementation::legacy:
         softmax_legacy<data_T, res_T, CONFIG_T>(data, res);
         break;
-    }    
+    case softmax_implementation::argmax:
+        softmax_argmax<data_T, res_T, CONFIG_T>(data, res);
+        break;
+    }  
 }
 
 // *************************************************
@@ -681,7 +713,6 @@ void ternary_tanh(hls::stream<data_T> &data, hls::stream<res_T> &res) {
 }
 
 
-
 }
 
-#endif
+#endif
diff --git a/test/pytest/test_softmax.py b/test/pytest/test_softmax.py
@@ -24,7 +24,7 @@ def generate_data(function, input_shape):
     return function((1000, *input_shape))
 
 @pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
-@pytest.mark.parametrize('strategy', ['stable'])
+@pytest.mark.parametrize('strategy', ['stable', 'argmax'])
 @pytest.mark.parametrize('function,input_shape,io_type', [
                             (flat_distribution, (8,), 'io_parallel'),
                             (high_accuracy_distribution, (8,), 'io_parallel'),
@@ -57,3 +57,29 @@ def test_softmax(backend, strategy, generate_data, input_shape, io_type, functio
     print('Accuracy hls4ml relative to keras: {}'.format(acc_hls4ml))
 
     assert acc_hls4ml >= 0.98
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+def test_softmax_skipped(backend, io_type):
+    X = np.random.rand(100, 10)
+    model = tf.keras.models.Sequential()
+    model.add(tf.keras.layers.Dense(14, input_shape=(10, ), name='dense'))
+    model.add(tf.keras.layers.Activation(activation='softmax', name='softmax'))
+    model.compile()
+
+    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    cfg['LayerName']['softmax']['skip'] = True
+
+    odir = str(test_root_path / 'hls4mlprj_softmax_skipped_{}_{}').format(backend, io_type)
+    hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=cfg, io_type=io_type, output_dir=odir, backend=backend)
+    hls_model.compile()
+
+    # Verify Softmax was removed
+    hls_layers = list(hls_model.get_layers()) # 0 is Input, 1 is Dense, 2 is Softmax (if not removed)
+    assert len(hls_layers)==2
+
+    # Verify hls4ml output is equal to Dense output
+    y_keras = model.predict(X)
+    y_hls4ml = hls_model.predict(X).reshape(y_keras.shape)
+    keras_trace = hls4ml.model.profiling.get_ymodel_keras(model, X)
+    np.testing.assert_allclose(y_hls4ml, keras_trace['dense'], rtol=0, atol=2e-2)