openvinotoolkit · chux0519 · May 21, 2024 · May 28, 2024 · May 28, 2024 · May 28, 2024
diff --git a/image_generation/common/detectors/.gitignore b/image_generation/common/detectors/.gitignore
@@ -0,0 +1,8 @@
+model
+*.png
+*.bmp
+*.txt
+*.jpg
+*.xml
+*.bin
+!requirements.txt
diff --git a/image_generation/common/detectors/CMakeLists.txt b/image_generation/common/detectors/CMakeLists.txt
@@ -0,0 +1,51 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.15)
+
+file(GLOB_RECURSE "detectors_SRC" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp")
+
+add_library(detectors STATIC ${detectors_SRC})
+add_library(detectors::detectors ALIAS detectors)
+
+target_include_directories(detectors
+    PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include"
+    PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
+
+if(CMAKE_COMPILER_IS_GNUCXX)
+    target_compile_options(detectors PUBLIC -march=native -Wall)
+endif()
+
+# dependencies
+
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+add_subdirectory(../../common/imwrite _deps/imwrite)
+
+# opencv is only for resize now
+find_package(OpenCV 4.9.0 REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+
+target_link_libraries(detectors PUBLIC openvino::runtime imwrite::imwrite ${OpenCV_LIBS})
+
+include(FetchContent)
+
+# gtest
+FetchContent_Declare(
+    googletest
+    # Specify the commit you depend on and update it regularly.
+    URL https://github.com/google/googletest/archive/refs/heads/main.zip
+  )
+  # For Windows: Prevent overriding the parent project's compiler/linker settings
+  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+  FetchContent_MakeAvailable(googletest)
+  include(GoogleTest)
+
+  # Now simply link against gtest or gtest_main as needed. Eg
+  add_executable(detectors_test detectors_test.cpp)
+  target_link_libraries(detectors_test PRIVATE detectors gtest_main)
+  gtest_discover_tests(detectors_test)
+
+
+add_executable(detectors_bridge detectors_bridge.cpp)
+target_link_libraries(detectors_bridge PRIVATE detectors)
diff --git a/image_generation/common/detectors/README.md b/image_generation/common/detectors/README.md
@@ -0,0 +1,10 @@
+## Detectors
+
+`Detectors` is a C++ static library, it encapsulates the implementation of the detector from the [controlnet_aux](https://github.com/huggingface/controlnet_aux) library. This library is designed to facilitate the integration of ControlNet support within Stable Diffusion.
+
+The library is written to operate with OpenVINO C++ API objects like `ov::Model` and `ov::Tensor` and can be used in deployment scenarios with OpenVINO Runtime on Edge.
+
+### Functionality
+
+The library contains ports of the following detectors:
+- [OpenposeDetector](https://github.com/huggingface/controlnet_aux/blob/6367d57749002a76900a4fc26c06b82b34f495f7/src/controlnet_aux/open_pose/__init__.py#L70C7-L70C23)
diff --git a/image_generation/common/detectors/detectors_bridge.cpp b/image_generation/common/detectors/detectors_bridge.cpp
@@ -0,0 +1,49 @@
+#include <opencv2/opencv.hpp>
+
+#include "imwrite.hpp"
+#include "openpose_detector.hpp"
+#include "utils.hpp"
+
+void print_usage(const char* program_name) {
+    std::cerr << "Usage: " << program_name << " -i <input_image> -o <output_image> -m <model>" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+    std::string input_image;
+    std::string output_image;
+    std::string model_path;
+
+    for (int i = 1; i < argc; ++i) {
+        if (std::string(argv[i]) == "-i" && i + 1 < argc) {
+            input_image = argv[++i];
+        } else if (std::string(argv[i]) == "-o" && i + 1 < argc) {
+            output_image = argv[++i];
+        } else if (std::string(argv[i]) == "-m" && i + 1 < argc) {
+            model_path = argv[++i];
+        } else {
+            print_usage(argv[0]);
+            return 1;
+        }
+    }
+
+    if (input_image.empty() || output_image.empty() || model_path.empty()) {
+        print_usage(argv[0]);
+        return 1;
+    }
+
+    auto ori_img = read_image_to_tensor(input_image);
+
+    std::cout << "Input image tensor shape: " << ori_img.get_shape() << std::endl;
+
+    OpenposeDetector detector;
+    detector.load(model_path + "/openpose.xml");
+
+    // forward, get subset and candidate
+    std::vector<std::vector<float>> subset;
+    std::vector<std::vector<float>> candidate;
+    auto result = detector.forward(ori_img, subset, candidate);
+
+    std::cout << "[DONE] result: " << output_image << std::endl;
+
+    imwrite(output_image, result, true);
+}
diff --git a/image_generation/common/detectors/detectors_test.cpp b/image_generation/common/detectors/detectors_test.cpp
@@ -0,0 +1,101 @@
+#include <gtest/gtest.h>
+
+#include "imwrite.hpp"
+#include "openpose_detector.hpp"
+#include "utils.hpp"
+
+TEST(OpenposeDetectorTest, UtilsFunction) {
+    OpenposeDetector detector;
+    detector.load("model/openpose.xml");
+
+    auto input_image = "scripts/im.txt";
+
+    // Set up initial parameters
+    int stride = 8;
+    int pad_val = 128;
+
+    unsigned long H = 768;
+    unsigned long W = 512;
+    unsigned long C = 3;
+
+    // functional tests
+    // Load Image
+    std::cout << "Load " << input_image << std::endl;
+    std::vector<std::uint8_t> im_array = read_bgr_from_txt(input_image);
+
+    ov::Shape img_shape = {1, H, W, C};  // NHWC
+    ov::Tensor img_tensor(ov::element::u8, img_shape);
+
+    // validate the read function
+    std::uint8_t* tensor_data = img_tensor.data<std::uint8_t>();
+    std::copy(im_array.begin(), im_array.end(), tensor_data);
+    std::cerr << "Tensor shape: " << img_tensor.get_shape() << std::endl;
+    imwrite(std::string("im.bmp"), img_tensor, false);
+
+    // validate the resize function
+    ov::Tensor small_img_tensor = smart_resize_k(img_tensor, 0.5, 0.5);
+    imwrite(std::string("im.half.bmp"), small_img_tensor, false);
+
+    ov::Tensor big_img_tensor = smart_resize_k(img_tensor, 2, 2);
+    imwrite(std::string("im.double.bmp"), big_img_tensor, false);
+
+    ov::Tensor need_pad_img_tensor = smart_resize(img_tensor, 761, 505);
+    auto [img_padded, pad] = pad_right_down_corner(need_pad_img_tensor, stride, pad_val);
+    imwrite(std::string("im.paded.bmp"), img_padded, false);
+
+    auto img_cropped = crop_right_down_corner(img_padded, pad);
+    imwrite(std::string("im.cropped.bmp"), img_cropped, false);
+}
+
+TEST(OpenposeDetectorTest, ForwardFunction) {
+    OpenposeDetector detector;
+    detector.load("model/openpose.xml");
+
+    unsigned long H = 768;
+    unsigned long W = 512;
+    unsigned long C = 3;
+
+    // read image from ndarray
+    auto input_image = "scripts/im.txt";
+    std::vector<std::uint8_t> input_array = read_bgr_from_txt(input_image);
+    ov::Tensor ori_img(ov::element::u8, {1, H, W, C});
+    std::uint8_t* input_data = ori_img.data<std::uint8_t>();
+    std::copy(input_array.begin(), input_array.end(), input_data);
+
+    // forward, get subset and candidate
+    std::vector<std::vector<float>> subset;
+    std::vector<std::vector<float>> candidate;
+    auto pos_result = detector.forward(ori_img, subset, candidate);
+    imwrite(std::string("im.result.bmp"), pos_result, true);
+
+    // print results
+    for (auto& cand : candidate) {
+        std::cout << "Candidate: " << cand[0] << " " << cand[1] << " " << cand[2] << " " << cand[3] << std::endl;
+    }
+
+    for (auto& sub : subset) {
+        std::cout << "Subset: ";
+        for (auto& s : sub) {
+            std::cout << s << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    // save candidate into a text file
+    std::ofstream out("candidate.txt");
+    for (auto& cand : candidate) {
+        out << cand[0] << " " << cand[1] << " " << cand[2] << " " << cand[3] << std::endl;
+    }
+    out.close();
+
+    // save subset into a text file
+    out.open("subset.txt");
+    for (auto& sub : subset) {
+        for (auto& s : sub) {
+            out << s << " ";
+        }
+        out << std::endl;
+    }
+
+    // we inspect the results in python
+}
diff --git a/image_generation/common/detectors/include/log.hpp b/image_generation/common/detectors/include/log.hpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+// string
+
+inline void debugPrint(const std::string& message) {
+    const char* debugEnv = std::getenv("DEBUG");
+    if (debugEnv != nullptr) {
+        std::cout << message << std::endl;
+    }
+}
+
+template <typename... Args>
+void debugPrint(const std::string& format_str, Args&&... args) {
+    const char* debugEnv = std::getenv("DEBUG");
+    if (debugEnv != nullptr) {
+        std::ostringstream oss;
+        ((oss << args << " "), ...);
+        std::cout << format_str << oss.str() << std::endl;
+    }
+}
+
+// vector
+template <typename T>
+void debugPrint(const std::vector<T>& vec) {
+    const char* debugEnv = std::getenv("DEBUG");
+    if (debugEnv != nullptr) {
+        std::cout << "[";
+        for (size_t i = 0; i < vec.size(); ++i) {
+            std::cout << vec[i];
+            if (i < vec.size() - 1) {
+                std::cout << ", ";
+            }
+        }
+        std::cout << "]" << std::endl;
+    }
+}
+
+// tuple
+template <typename Tuple, std::size_t Index>
+void printTupleElement(const Tuple& t, std::integral_constant<std::size_t, Index>) {
+    std::cout << std::get<Index>(t);
+    if constexpr (Index + 1 != std::tuple_size<Tuple>::value) {
+        std::cout << ", ";
+    }
+}
+
+template <typename Tuple, std::size_t... Is>
+void printTuple(const Tuple& t, std::index_sequence<Is...>) {
+    (..., printTupleElement(t, std::integral_constant<std::size_t, Is>{}));
+}
+
+template <typename... Args>
+void printTuple(const std::tuple<Args...>& t) {
+    std::cout << "(";
+    printTuple(t, std::index_sequence_for<Args...>{});
+    std::cout << ")";
+}
+
+template <typename... Args>
+void debugPrint(const std::tuple<Args...>& t) {
+    const char* debugEnv = std::getenv("DEBUG");
+    if (debugEnv != nullptr) {
+        printTuple(t);
+        std::cout << std::endl;
+    }
+}
diff --git a/image_generation/common/detectors/include/openpose_detector.hpp b/image_generation/common/detectors/include/openpose_detector.hpp
@@ -0,0 +1,59 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <openvino/runtime/tensor.hpp>
+
+#include "openvino/runtime/compiled_model.hpp"
+
+class OpenposeDetector {
+public:
+    OpenposeDetector() = default;
+
+    void load(const std::string&);
+    std::pair<ov::Tensor, ov::Tensor> inference(const ov::Tensor&);
+
+    ov::Tensor forward(const ov::Tensor&,
+                       std::vector<std::vector<float>>& subset,
+                       std::vector<std::vector<float>>& candidate);
+
+private:
+    ov::CompiledModel body_model;
+    static const std::vector<std::vector<int>> limbSeq;
+    static const std::vector<std::vector<int>> mapIdx;
+
+    // find the peaks from heatmap, returns a vector of tuple
+    // (x, y, score, id)
+    void find_heatmap_peaks(const ov::Tensor& heatmap_avg /* f32 */,
+                            float thre1,
+                            std::vector<std::vector<std::tuple<int, int, float, int>>>& all_peaks);
+
+    void calculate_connections(const ov::Tensor& paf_avg,
+                               const std::vector<std::vector<std::tuple<int, int, float, int>>>& all_peaks,
+                               const ov::Tensor& oriImg,
+                               const float thre2,
+                               std::vector<std::vector<std::tuple<int, int, float, int, int>>>& connection_all,
+                               std::vector<int>& special_k);
+
+    void process_connections(const std::vector<std::vector<std::tuple<int, int, float, int>>>& all_peaks,
+                             const std::vector<std::vector<std::tuple<int, int, float, int, int>>>& connection_all,
+                             const std::vector<int>& special_k,
+                             std::vector<std::vector<float>>& subset,
+                             std::vector<std::vector<float>>& candidate);
+
+    ov::Tensor render_pose(const ov::Tensor& image,
+                           const std::vector<std::vector<float>>& subset,
+                           const std::vector<std::vector<float>>& candidate);
+    struct Keypoint {
+        float x, y;
+        float score = 1.0;
+        int id = -1;
+    };
+
+    struct BodyResult {
+        std::vector<Keypoint> keypoints;
+        float total_score;
+        int total_parts;
+    };
+};