openvinotoolkit · chux0519 · May 21, 2024 · May 28, 2024 · May 28, 2024 · May 28, 2024
@@ -0,0 +1,6 @@
+*.xml
+*.bin
+models/
+images/
+*.bmp
+*.ini
@@ -0,0 +1,44 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.15)
+
+project(stable_diffusion_controlnet LANGUAGES CXX)
+
+if(POLICY CMP0135)
+    cmake_policy(SET CMP0135 NEW)
+endif()
+
+set(CMAKE_CXX_STANDARD 20)
+
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
+
+# dependencies
+
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+
+include(FetchContent)
+
+FetchContent_Declare(cxxopts
+    URL https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.1.1.tar.gz
+    URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08)
+
+FetchContent_MakeAvailable(cxxopts)
+
+add_subdirectory(./detectors _deps/detectors)
+add_subdirectory(../../common/diffusers _deps/diffusers)
+add_subdirectory(../../../thirdparty/openvino_tokenizers/ _deps/tokenizers)
+
+# create executable
+add_executable(${PROJECT_NAME} ${PROJECT_SOURCE_DIR}/src/core/core.cpp ${PROJECT_SOURCE_DIR}/src/cli/cli.cpp)
+target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
+target_link_libraries(${PROJECT_NAME} PRIVATE
+    openvino::runtime
+    cxxopts::cxxopts
+    diffusers::diffusers
+    imwrite::imwrite
+    detectors::detectors)
+
+add_dependencies(${PROJECT_NAME} openvino_tokenizers)
+
+target_compile_definitions(${PROJECT_NAME} PRIVATE TOKENIZERS_LIBRARY_PATH=\"$<TARGET_FILE:openvino_tokenizers>\")
@@ -0,0 +1,70 @@
+# stable diffusion 1.5 controlnet pipeline
+
+The pure C++ text-to-image and image-to-image pipeline, driven by the OpenVINO native C++ API for Stable Diffusion v1.5 with LMS Discrete Scheduler and Openpose.
+
+## Step 1: Setup Environment
+
+Prerequisites:
+- Conda ([installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html))
+
+C++ Packages:
+* [CMake](https://cmake.org/download/): Cross-platform build tool
+* [OpenVINO](https://docs.openvino.ai/install): Model inference. `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing.
+
+Prepare a python environment and install dependencies:
+
+```shell
+conda create -n ov_sd_controlnet python==3.11
+conda activate ov_sd_controlnet
+pip install -r ../../common/detectors/scripts/requirements.txt
+pip install -r scripts/requirements.txt
+```
+
+## Step 2: Convert Models
+
+1. Convert tokenizer
+
+```shell
+pip install openvino-tokenizers
+
+convert_tokenizer openai/clip-vit-large-patch14 --with-detokenizer -o models/tokenizer
+```
+
+2. Convert the rest of models
+
+```shell
+python scripts/convert_sd_controlnet.py
+```
+
+It will download missing models and convert them into openvino models.
+
+## Step 3: Build the Application
+
+On windows, we should install openvino and opencv first.
+
+- openvino: follow the instruction [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html)
+- opencv: download the latest release version and install to somewhere on your PC(for example `D:\opencv\`).
+
+then open commandline prompt(x86 Native Tools Command Prompt for VS 2022)
+
+```shell
+"C:\Program Files (x86)\Intel\openvino_2024\setupvars.bat"
+"D:\opencv\opencv\build\setup_vars_opencv4.cmd"
+
+cmake -S . -B build -DOpenCV_DIR="D:\opencv\opencv\build" -DOpenVINO_DIR="C:\Program Files (x86)\Intel\openvino_2024\runtime\cmake"
+
+cmake --build build --parallel --config Release
+```
+
+## Step 5: Run Pipeline
+
+Using `-h` to check all the options, here is a simple image-to-image example.
+
+
+```shell
+.\build\Release\stable_diffusion_controlnet.exe -p "Dancing Darth Vader, best quality, extremely detailed" -n "monochrome, lowres, bad anatomy, worst quality, low quality" -i ".\scripts\pose.png" -s 42 --step 20 -d GPU
+```
+
+## Step 6: Verify Results
+
+There's a [notebook](./scripts/verify.ipynb), you can just run the notebook and check the results generated by cpp and python.
@@ -0,0 +1,10 @@
+model
+*.png
+*.bmp
+*.txt
+*.jpg
+*.xml
+*.bin
+!requirements.txt
+!CMakeLists.txt
+*.pth
@@ -0,0 +1,52 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.15)
+
+file(GLOB_RECURSE "detectors_SRC" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp")
+
+add_library(detectors STATIC ${detectors_SRC})
+add_library(detectors::detectors ALIAS detectors)
+
+target_include_directories(detectors
+    PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include"
+    PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
+
+if(CMAKE_COMPILER_IS_GNUCXX)
+    target_compile_options(detectors PUBLIC -march=native -Wall)
+endif()
+
+# dependencies
+
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+add_subdirectory(../../../common/imwrite _deps/imwrite)
+
+# opencv is only for resize now
+find_package(OpenCV 4.9.0 REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+target_compile_features(detectors PRIVATE cxx_std_17)
+target_link_libraries(detectors PUBLIC openvino::runtime imwrite::imwrite ${OpenCV_LIBS})
+
+include(FetchContent)
+
+# gtest
+FetchContent_Declare(
+    googletest
+    # Specify the commit you depend on and update it regularly.
+    URL https://github.com/google/googletest/archive/refs/heads/main.zip
+  )
+  # For Windows: Prevent overriding the parent project's compiler/linker settings
+  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+  FetchContent_MakeAvailable(googletest)
+  include(GoogleTest)
+
+  # Now simply link against gtest or gtest_main as needed. Eg
+  add_executable(detectors_test detectors_test.cpp)
+  target_link_libraries(detectors_test PRIVATE detectors gtest_main)
+  gtest_discover_tests(detectors_test)
+
+
+add_executable(detectors_bridge detectors_bridge.cpp)
+target_compile_features(detectors_bridge PRIVATE cxx_std_17)
+target_link_libraries(detectors_bridge PRIVATE detectors)
@@ -0,0 +1,10 @@
+## Detectors
+
+`Detectors` is a C++ static library, it encapsulates the implementation of the detector from the [controlnet_aux](https://github.com/huggingface/controlnet_aux) library. This library is designed to facilitate the integration of ControlNet support within Stable Diffusion.
+
+The library is written to operate with OpenVINO C++ API objects like `ov::Model` and `ov::Tensor` and can be used in deployment scenarios with OpenVINO Runtime on Edge.
+
+### Functionality
+
+The library contains ports of the following detectors:
+- [OpenposeDetector](https://github.com/huggingface/controlnet_aux/blob/6367d57749002a76900a4fc26c06b82b34f495f7/src/controlnet_aux/open_pose/__init__.py#L70C7-L70C23)
@@ -0,0 +1,49 @@
+#include <opencv2/opencv.hpp>
+
+#include "imwrite.hpp"
+#include "openpose_detector.hpp"
+#include "utils.hpp"
+
+void print_usage(const char* program_name) {
+    std::cerr << "Usage: " << program_name << " -i <input_image> -o <output_image> -m <model>" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+    std::string input_image;
+    std::string output_image;
+    std::string model_path;
+
+    for (int i = 1; i < argc; ++i) {
+        if (std::string(argv[i]) == "-i" && i + 1 < argc) {
+            input_image = argv[++i];
+        } else if (std::string(argv[i]) == "-o" && i + 1 < argc) {
+            output_image = argv[++i];
+        } else if (std::string(argv[i]) == "-m" && i + 1 < argc) {
+            model_path = argv[++i];
+        } else {
+            print_usage(argv[0]);
+            return 1;
+        }
+    }
+
+    if (input_image.empty() || output_image.empty() || model_path.empty()) {
+        print_usage(argv[0]);
+        return 1;
+    }
+
+    auto ori_img = read_image_to_tensor(input_image);
+
+    std::cout << "Input image tensor shape: " << ori_img.get_shape() << std::endl;
+
+    OpenposeDetector detector;
+    detector.load(model_path + "/openpose.xml");
+
+    // forward, get subset and candidate
+    std::vector<std::vector<float>> subset;
+    std::vector<std::vector<float>> candidate;
+    auto result = detector.forward(ori_img, subset, candidate);
+
+    std::cout << "[DONE] result: " << output_image << std::endl;
+
+    imwrite(output_image, result, true);
+}
@@ -0,0 +1,101 @@
+#include <gtest/gtest.h>
+
+#include "imwrite.hpp"
+#include "openpose_detector.hpp"
+#include "utils.hpp"
+
+TEST(OpenposeDetectorTest, UtilsFunction) {
+    OpenposeDetector detector;
+    detector.load("model/openpose.xml");
+
+    auto input_image = "scripts/im.txt";
+
+    // Set up initial parameters
+    int stride = 8;
+    int pad_val = 128;
+
+    unsigned long H = 768;
+    unsigned long W = 512;
+    unsigned long C = 3;
+
+    // functional tests
+    // Load Image
+    std::cout << "Load " << input_image << std::endl;
+    std::vector<std::uint8_t> im_array = read_bgr_from_txt(input_image);
+
+    ov::Shape img_shape = {1, H, W, C};  // NHWC
+    ov::Tensor img_tensor(ov::element::u8, img_shape);
+
+    // validate the read function
+    std::uint8_t* tensor_data = img_tensor.data<std::uint8_t>();
+    std::copy(im_array.begin(), im_array.end(), tensor_data);
+    std::cerr << "Tensor shape: " << img_tensor.get_shape() << std::endl;
+    imwrite(std::string("im.bmp"), img_tensor, false);
+
+    // validate the resize function
+    ov::Tensor small_img_tensor = smart_resize_k(img_tensor, 0.5, 0.5);
+    imwrite(std::string("im.half.bmp"), small_img_tensor, false);
+
+    ov::Tensor big_img_tensor = smart_resize_k(img_tensor, 2, 2);
+    imwrite(std::string("im.double.bmp"), big_img_tensor, false);
+
+    ov::Tensor need_pad_img_tensor = smart_resize(img_tensor, 761, 505);
+    auto [img_padded, pad] = pad_right_down_corner(need_pad_img_tensor, stride, pad_val);
+    imwrite(std::string("im.paded.bmp"), img_padded, false);
+
+    auto img_cropped = crop_right_down_corner(img_padded, pad);
+    imwrite(std::string("im.cropped.bmp"), img_cropped, false);
+}
+
+TEST(OpenposeDetectorTest, ForwardFunction) {
+    OpenposeDetector detector;
+    detector.load("model/openpose.xml");
+
+    unsigned long H = 768;
+    unsigned long W = 512;
+    unsigned long C = 3;
+
+    // read image from ndarray
+    auto input_image = "scripts/im.txt";
+    std::vector<std::uint8_t> input_array = read_bgr_from_txt(input_image);
+    ov::Tensor ori_img(ov::element::u8, {1, H, W, C});
+    std::uint8_t* input_data = ori_img.data<std::uint8_t>();
+    std::copy(input_array.begin(), input_array.end(), input_data);
+
+    // forward, get subset and candidate
+    std::vector<std::vector<float>> subset;
+    std::vector<std::vector<float>> candidate;
+    auto pos_result = detector.forward(ori_img, subset, candidate);
+    imwrite(std::string("im.result.bmp"), pos_result, true);
+
+    // print results
+    for (auto& cand : candidate) {
+        std::cout << "Candidate: " << cand[0] << " " << cand[1] << " " << cand[2] << " " << cand[3] << std::endl;
+    }
+
+    for (auto& sub : subset) {
+        std::cout << "Subset: ";
+        for (auto& s : sub) {
+            std::cout << s << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    // save candidate into a text file
+    std::ofstream out("candidate.txt");
+    for (auto& cand : candidate) {
+        out << cand[0] << " " << cand[1] << " " << cand[2] << " " << cand[3] << std::endl;
+    }
+    out.close();
+
+    // save subset into a text file
+    out.open("subset.txt");
+    for (auto& sub : subset) {
+        for (auto& s : sub) {
+            out << s << " ";
+        }
+        out << std::endl;
+    }
+
+    // we inspect the results in python
+}
-Original file line number
+Diff line change
@@ -0,0 +1,6 @@
+    *.xml
+    *.bin
+    models/
+    images/
+    *.bmp
+    *.ini