Add RISC-V V support (tesseract-ocr#4346)

sunyuechi · stweil · stweil · commit 16fc9d90a4f8 · 2024-11-08T08:09:01.000+01:00
Convert riscv-v-spec-1.0.pdf into 111 PNG images,
then perform OCR on each one in sequence,
and measure the testing time on banana_f3:

old:        31m16.267s
new:        16m51.155s

Co-authored-by: sunyuechi &lt;sunyuechi@iscas.ac.cn&gt;
Co-authored-by: Stefan Weil &lt;sw@weilnetz.de&gt;
diff --git a/Makefile.am b/Makefile.am
@@ -199,6 +199,15 @@ libtesseract_la_LIBADD += libtesseract_neon.la
 noinst_LTLIBRARIES += libtesseract_neon.la
 endif
 
+if HAVE_RVV
+libtesseract_rvv_la_CXXFLAGS = $(RVV_CXXFLAGS)
+libtesseract_rvv_la_CXXFLAGS += -O3
+libtesseract_rvv_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
+libtesseract_rvv_la_SOURCES = src/arch/intsimdmatrixrvv.cpp
+libtesseract_la_LIBADD += libtesseract_rvv.la
+noinst_LTLIBRARIES += libtesseract_rvv.la
+endif
+
 libtesseract_la_SOURCES += src/arch/intsimdmatrix.cpp
 libtesseract_la_SOURCES += src/arch/simddetect.cpp
 
diff --git a/configure.ac b/configure.ac
@@ -131,6 +131,7 @@ AM_CONDITIONAL([HAVE_AVX512F], false)
 AM_CONDITIONAL([HAVE_FMA], false)
 AM_CONDITIONAL([HAVE_SSE4_1], false)
 AM_CONDITIONAL([HAVE_NEON], false)
+AM_CONDITIONAL([HAVE_RVV], false)
 
 case "${host_cpu}" in
 
@@ -188,6 +189,16 @@ case "${host_cpu}" in
 
     ;;
 
+  riscv*)
+
+    AX_CHECK_COMPILE_FLAG([-march=rv64gcv], [rvv=true], [rvv=false], [$WERROR])
+    AM_CONDITIONAL([HAVE_RVV], [$rvv])
+    if $rvv; then
+      AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions])
+      check_for_rvv=1
+    fi
+    ;;
+
   *)
 
     AC_MSG_WARN([No compiler options for $host_cpu])
@@ -207,6 +218,16 @@ if test x$check_for_neon = x1; then
   fi
 fi
 
+# additional checks for RVV targets
+if test x$check_for_rvv = x1; then
+  AC_MSG_NOTICE([checking how to detect RVV availability])
+  AC_CHECK_FUNCS([getauxval])
+
+  if test $ac_cv_func_getauxval = no; then
+      AC_MSG_WARN([RVV is available, but we don't know how to check for it.  Will not be able to use RVV.])
+  fi
+fi
+
 AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR])
 AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd)
 
diff --git a/src/arch/intsimdmatrix.h b/src/arch/intsimdmatrix.h
@@ -115,6 +115,8 @@ struct TESS_API IntSimdMatrix {
   static const IntSimdMatrix *intSimdMatrix;
   // Only available with NEON.
   static const IntSimdMatrix intSimdMatrixNEON;
+  // Only available with RVV.
+  static const IntSimdMatrix intSimdMatrixRVV;
   // Only available with AVX2 / AVX / FMA / SSE.
   static const IntSimdMatrix intSimdMatrixAVX2;
   static const IntSimdMatrix intSimdMatrixSSE;
diff --git a/src/arch/intsimdmatrixrvv.cpp b/src/arch/intsimdmatrixrvv.cpp
@@ -0,0 +1,88 @@
+///////////////////////////////////////////////////////////////////////
+// File:        intsimdmatrixrvv.cpp
+// Description: matrix-vector product for 8-bit data on rvv.
+// Author:      sunyuechi
+//
+// Copyright (c) 2024 Institute of Software Chinese Academy of Sciences (ISCAS).
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h" // for HAVE_RVV, ...
+#endif
+
+#if HAVE_RVV
+#  include "intsimdmatrix.h"
+#  include "tesstypes.h"
+
+namespace tesseract {
+
+static int DotProduct(const int8_t *u, const int8_t *v, int num) {
+  int total = 0;
+
+  asm __volatile__ (
+    "  .option       arch, +v                   \n\t"
+    "  vsetvli t0,zero,e32,m8,ta,ma             \n\t"
+    "  vmv.v.i v0,0                             \n\t"
+    "1:                                         \n\t"
+    "  vsetvli t0,%[num],e8,m2,ta,ma            \n\t"
+    "  vle8.v v16,0(%[u])                       \n\t"
+    "  vle8.v v24,0(%[v])                       \n\t"
+    "  sub %[num],%[num],t0                     \n\t"
+    "  vwmul.vv v8,v24,v16                      \n\t"
+    "  add %[u],%[u],t0                         \n\t"
+    "  add %[v],%[v],t0                         \n\t"
+    "  vsetvli zero,zero,e16,m4,tu,ma           \n\t"
+    "  vwadd.wv v0,v0,v8                        \n\t"
+    "  bnez %[num],1b                           \n\t"
+    "  vsetvli t0,zero,e32,m8,ta,ma             \n\t"
+    "  vmv.s.x v8,zero                          \n\t"
+    "  vredsum.vs v0,v0,v8                      \n\t"
+    "  vmv.x.s %[total],v0                      \n\t"
+    :  [u] "+r" (u),
+       [v] "+r" (v),
+       [num] "+r" (num),
+       [total] "+r" (total)
+    :
+    :  "cc", "memory"
+  );
+
+  return total;
+}
+
+static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
+                            const int8_t *u, TFloat *v) {
+  int num_out = dim1;
+  int num_in = dim2 - 1;
+  for (int i = 0; i < num_out; ++i) {
+    const int8_t *wi_start = wi + i * dim2;
+    int total = DotProduct(wi_start, u, num_in);
+    // Add in the bias and apply scaling.
+    v[i] = (total + wi_start[num_in] * INT8_MAX) * scales[i];
+  }
+}
+
+const IntSimdMatrix IntSimdMatrix::intSimdMatrixRVV = {
+    // Function.
+    matrixDotVector,
+    // Number of 32 bit outputs held in each register.
+    1,
+    // Maximum number of registers that we will use to hold outputs.
+    1,
+    // Number of 8 bit inputs in the inputs register.
+    1,
+    // Number of inputs in each weight group.
+    1
+};
+
+} // namespace tesseract.
+
+#endif /* HAVE_RVV */
diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp
@@ -65,6 +65,13 @@
 #  endif
 #endif
 
+#if defined(HAVE_RVV)
+#  if defined(HAVE_GETAUXVAL)
+#    include <sys/auxv.h>
+#    define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
+#  endif
+#endif
+
 namespace tesseract {
 
 // Computes and returns the dot product of the two n-vectors u and v.
@@ -89,6 +96,8 @@ bool SIMDDetect::neon_available_ = true;
 #elif defined(HAVE_NEON)
 // If true, then Neon has been detected.
 bool SIMDDetect::neon_available_;
+#elif defined(HAVE_RVV)
+bool SIMDDetect::rvv_available_;
 #else
 // If true, then AVX has been detected.
 bool SIMDDetect::avx_available_;
@@ -229,6 +238,13 @@ SIMDDetect::SIMDDetect() {
   elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
   neon_available_ = hwcap & HWCAP_NEON;
 #  endif
+#endif
+
+#if defined(HAVE_RVV)
+#  if defined(HAVE_GETAUXVAL)
+  const unsigned long hwcap = getauxval(AT_HWCAP);
+  rvv_available_ = hwcap & HWCAP_RV('V');
+#  endif
 #endif
 
   // Select code for calculation of dot product based on autodetection.
@@ -258,6 +274,10 @@ SIMDDetect::SIMDDetect() {
   } else if (neon_available_) {
     // NEON detected.
     SetDotProduct(DotProductNEON, &IntSimdMatrix::intSimdMatrixNEON);
+#endif
+#if defined(HAVE_RVV)
+  } else if (rvv_available_) {
+    SetDotProduct(DotProductGeneric, &IntSimdMatrix::intSimdMatrixRVV);
 #endif
   }
 
diff --git a/src/arch/simddetect.h b/src/arch/simddetect.h
@@ -63,6 +63,10 @@ class SIMDDetect {
   static inline bool IsNEONAvailable() {
     return detector.neon_available_;
   }
+  // Returns true if RVV is available on this system.
+  static inline bool IsRVVAvailable() {
+    return detector.rvv_available_;
+  }
 
   // Update settings after config variable was set.
   static TESS_API void Update();
@@ -86,6 +90,8 @@ class SIMDDetect {
   static TESS_API bool sse_available_;
   // If true, then NEON has been detected.
   static TESS_API bool neon_available_;
+  // If true, then RVV has been detected.
+  static TESS_API bool rvv_available_;
 };
 
 } // namespace tesseract
diff --git a/src/tesseract.cpp b/src/tesseract.cpp
@@ -112,6 +112,9 @@ static void PrintVersionInfo() {
 #if defined(HAVE_NEON) || defined(__aarch64__)
   if (tesseract::SIMDDetect::IsNEONAvailable())
     printf(" Found NEON\n");
+#elif defined(HAVE_RVV)
+  if (tesseract::SIMDDetect::IsRVVAvailable())
+    printf(" Found RVV\n");
 #else
   if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
     printf(" Found AVX512BW\n");