diff --git a/.github/actions/cache/package-lock.json b/.github/actions/cache/package-lock.json index 988190bfa0b294..03d34e378347fd 100644 --- a/.github/actions/cache/package-lock.json +++ b/.github/actions/cache/package-lock.json @@ -23,7 +23,6 @@ "eslint-plugin-jest": "^27.9.0", "fs": "0.0.1-security", "jest": "^29.7.0", - "mock-fs": "^5.2.0", "prettier": "^3.2.5" }, "engines": { @@ -3812,12 +3811,12 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -5378,9 +5377,9 @@ } }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, "dependencies": { "to-regex-range": "^5.0.1" @@ -8328,15 +8327,6 @@ "node": ">=10" } }, - "node_modules/mock-fs": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/mock-fs/-/mock-fs-5.2.0.tgz", - "integrity": "sha512-2dF2R6YMSZbpip1V1WHKGLNjr/k48uQClqMVb5H3MOvwc9qhYis3/IWbj02qIg/Y8MDXKFF4c5v0rxx2o6xTZw==", - "dev": true, - "engines": { - "node": ">=12.0.0" - } - }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -9433,9 +9423,9 @@ } }, "node_modules/tar": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.0.tgz", - "integrity": "sha512-/Wo7DcT0u5HUV486xg675HtjNd3BXZ6xDbzsCUZPt5iw8bTQ63bP0Raut3mvro9u+CUyq7YQd8Cx55fsZXxqLQ==", + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz", + "integrity": "sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==", "dependencies": { "chownr": "^2.0.0", "fs-minipass": "^2.0.0", @@ -9707,9 +9697,9 @@ } }, "node_modules/undici": { - "version": "5.28.3", - "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.3.tgz", - "integrity": "sha512-3ItfzbrhDlINjaP0duwnNsKpDQk3acHI3gVJ1z4fmwMK31k5G9OVIAMLSIaP6w4FaGkaAkN6zaQO9LUvZ1t7VA==", + "version": "5.28.4", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz", + "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==", "dependencies": { "@fastify/busboy": "^2.0.0" }, diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 88270be301bc90..ae48310adafe6f 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-25107 +pr-25303 diff --git a/.github/dockerfiles/ov_build/fedora_33/Dockerfile b/.github/dockerfiles/ov_build/fedora_33/Dockerfile new file mode 100644 index 00000000000000..fc94c37d67a321 --- /dev/null +++ b/.github/dockerfiles/ov_build/fedora_33/Dockerfile @@ -0,0 +1,23 @@ +FROM openvinogithubactions.azurecr.io/dockerio/library/fedora:33 + +USER root + +RUN yum update -y && yum install -y git + +# Install build dependencies +ADD install_build_dependencies.sh /install_build_dependencies.sh +RUN chmod +x /install_build_dependencies.sh && \ + /install_build_dependencies.sh && \ + rm -rf /var/lib/apt/lists/* + +# Install sscache +ARG SCCACHE_VERSION="v0.7.5" +ENV SCCACHE_HOME="/opt/sccache" \ + SCCACHE_PATH="/opt/sccache/sccache" + +RUN mkdir ${SCCACHE_HOME} && cd ${SCCACHE_HOME} && \ + SCCACHE_ARCHIVE="sccache-${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" && \ + curl -SLO https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/${SCCACHE_ARCHIVE} && \ + tar -xzf ${SCCACHE_ARCHIVE} --strip-components=1 && rm ${SCCACHE_ARCHIVE} + +ENV PATH="$SCCACHE_HOME:$PATH" diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_android_arm64/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_android_arm64/Dockerfile new file mode 100644 index 00000000000000..f3d8caa5220f28 --- /dev/null +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_android_arm64/Dockerfile @@ -0,0 +1,55 @@ +FROM openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt update && \ + apt install software-properties-common git ca-certificates && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + apt update && \ + apt install \ + scons \ + wget \ + ninja-build \ + build-essential \ + python3-pip && \ + # vcpkg requires cmake 3.19 or later + python3 -m pip install -U pip cmake~=3.28.0 && \ + # vcpkg's tool dependencies + apt install curl zip unzip tar && \ + # vcpkg 'python3' port dependencies + apt install autoconf libtool autoconf-archive && \ + # vcpkg tree of dependencies require extra packages + apt install pkgconf linux-libc-dev && \ + apt --no-install-recommends install default-jdk && \ + rm -rf /var/lib/apt/lists/* + +# Install sscache +ARG SCCACHE_VERSION="v0.7.5" +ENV SCCACHE_HOME="/opt/sccache" \ + SCCACHE_PATH="/opt/sccache/sccache" + +RUN mkdir ${SCCACHE_HOME} && cd ${SCCACHE_HOME} && \ + SCCACHE_ARCHIVE="sccache-${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" && \ + curl -SLO https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/${SCCACHE_ARCHIVE} && \ + tar -xzf ${SCCACHE_ARCHIVE} --strip-components=1 && rm ${SCCACHE_ARCHIVE} + +ENV PATH="$SCCACHE_HOME:$PATH" + +# Install Android SDK, NDK and Tools +ENV ANDROID_TOOLS /deps/android_tools +ENV ANDROID_NDK_HOME /deps/android_tools/ndk-bundle +RUN mkdir -p ${ANDROID_NDK_HOME} +ENV ANDROID_SDK_VERSION 29 + +RUN wget https://dl.google.com/android/repository/commandlinetools-linux-7583922_latest.zip && \ + unzip commandlinetools-linux-7583922_latest.zip +RUN echo "yes" | ./cmdline-tools/bin/sdkmanager --sdk_root=${ANDROID_TOOLS} --install "ndk-bundle" "platform-tools" "platforms;android-${ANDROID_SDK_VERSION}" diff --git a/.github/dockerfiles/ov_build/webassembly/Dockerfile b/.github/dockerfiles/ov_build/webassembly/Dockerfile new file mode 100644 index 00000000000000..66765ed9341efe --- /dev/null +++ b/.github/dockerfiles/ov_build/webassembly/Dockerfile @@ -0,0 +1,33 @@ +FROM openvinogithubactions.azurecr.io/dockerio/emscripten/emsdk:3.1.61 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install software-properties-common && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + apt-get update && \ + apt-get install \ + git \ + ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +# Install sscache +ARG SCCACHE_VERSION="v0.7.5" +ENV SCCACHE_HOME="/opt/sccache" \ + SCCACHE_PATH="/opt/sccache/sccache" + +RUN mkdir ${SCCACHE_HOME} && cd ${SCCACHE_HOME} && \ + SCCACHE_ARCHIVE="sccache-${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" && \ + curl -SLO https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/${SCCACHE_ARCHIVE} && \ + tar -xzf ${SCCACHE_ARCHIVE} --strip-components=1 && rm ${SCCACHE_ARCHIVE} + +ENV PATH="$SCCACHE_HOME:$PATH" diff --git a/.github/labeler.yml b/.github/labeler.yml index eea70d31684e4c..64a8661cf1e2e8 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -141,6 +141,7 @@ 'category: TF FE': - 'src/frontends/tensorflow/**/*' - 'src/frontends/tensorflow_common/**/*' +- 'src/bindings/python/src/openvino/frontend/tensorflow/**/*' - 'tests/layer_tests/tensorflow_tests/**/*' - 'tests/layer_tests/tensorflow2_keras_tests/**/*' - 'tests/layer_tests/jax_tests/**/*' @@ -163,6 +164,7 @@ 'category: JAX FE': - 'src/frontends/jax/**/*' +- 'src/bindings/python/src/openvino/frontend/jax/**/*' - 'tests/layer_tests/jax_tests/**/*' 'category: tools': diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 15080460634219..de5b6c0011e34d 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -20,6 +20,7 @@ jobs: runs-on: ubuntu-latest outputs: affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action @@ -40,15 +41,37 @@ jobs: skip_when_only_listed_labels_set: 'docs' skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*' - Build: + Docker: needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_22_04_android_arm64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + + Build: + needs: [Smart_CI, Docker] timeout-minutes: 150 defaults: run: shell: bash runs-on: aks-linux-16-cores-32gb container: - image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_android_arm64 }} volumes: - /mount:/mount options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING @@ -64,8 +87,8 @@ jobs: OPENVINO_REPO: '/__w/openvino/openvino/openvino' VCPKG_ROOT: '/__w/openvino/openvino/vcpkg' BUILD_DIR: '/__w/openvino/openvino/build' - ANDROID_TOOLS: '/__w/openvino/openvino/android_tools' - ANDROID_NDK_HOME: '/__w/openvino/openvino/android_tools/ndk-bundle' + ANDROID_TOOLS: '/deps/android_tools' + ANDROID_NDK_HOME: '/deps/android_tools/ndk-bundle' ANDROID_SDK_VERSION: 29 ANDROID_ABI_CONFIG: arm64-v8a VCPKG_DEFAULT_BINARY_CACHE: '/mount/caches/ccache/android_arm64/vcpkg_cache' @@ -73,9 +96,6 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: android_arm64 if: "!needs.smart_ci.outputs.skip_workflow" steps: - - name: Install git - run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates - - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: @@ -107,35 +127,6 @@ jobs: - name: System info uses: ./openvino/.github/actions/system_info - # - # Dependencies - # - - - name: Install dependencies - run: | - # generic dependencies - apt --assume-yes install ccache scons ninja-build build-essential python3-pip - - # vcpkg requires cmake 3.19 or later - python3 -m pip install -U pip cmake~=3.28.0 - # vcpkg's tool dependencies - apt --assume-yes install curl zip unzip tar - # vcpkg 'python3' port dependencies - apt --assume-yes install autoconf libtool autoconf-archive - # vcpkg tree of dependencies require extra packages - apt --assume-yes install pkgconf linux-libc-dev - - # Install Android SDK, NDK and Tools - apt -y --no-install-recommends install unzip wget default-jdk - wget https://dl.google.com/android/repository/commandlinetools-linux-7583922_latest.zip - unzip commandlinetools-linux-7583922_latest.zip - echo "yes" | ./cmdline-tools/bin/sdkmanager --sdk_root=${ANDROID_TOOLS} --install "ndk-bundle" "platform-tools" "platforms;android-${{ env.ANDROID_SDK_VERSION }}" - - - name: Install sccache - uses: mozilla-actions/sccache-action@89e9040de88b577a072e3760aaf59f585da083af # v0.0.5 - with: - version: "v0.7.5" - # # Build # diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index 5835815e0d9e39..02cd0abf018319 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -20,6 +20,7 @@ jobs: runs-on: ubuntu-latest outputs: affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action @@ -40,15 +41,42 @@ jobs: skip_when_only_listed_labels_set: 'docs' skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*' - Build: + - name: Show affected components + run: | + echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + Docker: needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/fedora_33 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + + Build: + needs: [Docker, Smart_CI] timeout-minutes: 150 defaults: run: shell: bash runs-on: aks-linux-16-cores-32gb container: - image: fedora:33 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.fedora_33 }} volumes: - /mount:/mount options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING @@ -69,9 +97,6 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: fedora33_x86_64_Release if: "!needs.smart_ci.outputs.skip_workflow" steps: - - name: Install git - run: yum update -y && yum install -y git - - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: @@ -89,14 +114,6 @@ jobs: # Dependencies # - - name: Install build dependencies - run: bash ${OPENVINO_REPO}/install_build_dependencies.sh - - - name: Install sccache - uses: mozilla-actions/sccache-action@89e9040de88b577a072e3760aaf59f585da083af # v0.0.5 - with: - version: "v0.7.5" - - name: Install python dependencies run: | python3 -m pip install -U pip @@ -204,14 +221,14 @@ jobs: if-no-files-found: 'error' RPM_Packages: - needs: Build + needs: [Docker, Build] timeout-minutes: 10 defaults: run: shell: bash - runs-on: ubuntu-20.04 + runs-on: aks-linux-4-cores-16gb container: - image: fedora:33 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.fedora_33 }} env: RPM_PACKAGES_DIR: /__w/openvino/packages/ diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index bab441e3e27453..4c7a14e891b49e 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -293,6 +293,18 @@ jobs: export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH python3 ${OPENVINO_REPO}/docs/articles_en/assets/snippets/main.py + - name: Python API Tests -- numpy>=2.0.0 + if: ${{ fromJSON(inputs.affected-components).Python_API.test }} + run: | + python3 -m pip uninstall -y numpy + python3 -m pip install "numpy>=2.0.0,<2.1.0" + python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH + python3 -m pytest -sv ${INSTALL_TEST_DIR}/pyopenvino \ + --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ + --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py + - name: Upload Test Results uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 if: ${{ !cancelled() }} diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 17828576336bfc..c39ec81467eb75 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -135,7 +135,7 @@ jobs: if: always() run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v -k "not (TestTimmConvertModel or TestTorchHubConvertModel)" + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v -k "not (TestTimmConvertModel or TestTorchHubConvertModel or test_pa_precommit)" env: TYPE: ${{ inputs.event == 'schedule' && 'nightly' || 'precommit'}} TEST_DEVICE: CPU @@ -146,13 +146,23 @@ jobs: if: always() run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/test_pa_transformation.py -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/test_pa_transformation.py -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 4 env: TYPE: ${{ inputs.event == 'schedule' && 'nightly' || 'precommit'}} TEST_DEVICE: CPU USE_SYSTEM_CACHE: False OP_REPORT_FILE: ${{ env.INSTALL_TEST_DIR }}/TEST-torch_unsupported_ops.log + - name: StatefulToStateless Test + if: always() + run: | + export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/test_stateful_to_stateless_transformation.py -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_stateful_to_stateless_tests.html --self-contained-html -v --tb=short + env: + TYPE: ${{ inputs.event == 'schedule' && 'nightly' || 'precommit'}} + TEST_DEVICE: CPU + USE_SYSTEM_CACHE: False + - name: Reformat unsupported ops file if: '!cancelled()' run: | diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index fbe5b97a1ea520..9cf1acc05e7220 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -92,6 +92,26 @@ jobs: Expand-Archive openvino_package.zip -DestinationPath "${{ env.INSTALL_DIR }}" popd + # + # Dependencies + # + + - name: Install OpenVINO Python wheel (Linux and macOS) + if: runner.os != 'Windows' + run: | + # Find and install wheel + pushd ${INSTALL_DIR}/tools + wheel_name=$(find . -name 'openvino-*.whl') + python3 -m pip install $wheel_name + popd + + - name: Install OpenVINO Python wheel (Windows) + if: runner.os == 'Windows' + run: | + # Find and install wheel + $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\\tools" -Filter openvino-*.whl | % { $_.FullName } + python3 -m pip install "$ovCoreWheelPath" + # # Build # @@ -99,16 +119,15 @@ jobs: - name: Build tokenizers wheel (Linux and macOS) if: runner.os != 'Windows' run: | - source ${INSTALL_DIR}/setupvars.sh - python -m pip wheel -v --no-deps --wheel-dir ${EXTENSION_BUILD_DIR} ${OPENVINO_TOKENIZERS_REPO} + # use OpenVINO wheel package only to build the extension + python -m pip wheel -v --no-deps --wheel-dir ${EXTENSION_BUILD_DIR} --find-links ${INSTALL_DIR}/tools ${OPENVINO_TOKENIZERS_REPO} env: CMAKE_BUILD_PARALLEL_LEVEL: '4' - name: Build tokenizers wheel (Windows) if: runner.os == 'Windows' run: | - . "${{ env.INSTALL_DIR }}/setupvars.ps1" - python3 -m pip wheel -v --no-deps --wheel-dir ${env:EXTENSION_BUILD_DIR} ${env:OPENVINO_TOKENIZERS_REPO} + python3 -m pip wheel -v --no-deps --wheel-dir ${env:EXTENSION_BUILD_DIR} --find-links ${env:INSTALL_DIR}/tools ${env:OPENVINO_TOKENIZERS_REPO} env: CMAKE_BUILD_PARALLEL_LEVEL: '4' diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 4358af5707b077..e75cc2a1867e55 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -687,7 +687,7 @@ jobs: Overall_Status: name: ci/gha_overall_status needs: [Smart_CI, Build, Debian_Packages, Samples, Conformance, ONNX_Runtime, CXX_Unit_Tests, Python_Unit_Tests, TensorFlow_Layer_Tests, - CPU_Functional_Tests, TensorFlow_Models_Tests_Precommit, PyTorch_Models_Tests, NVIDIA_Plugin, Openvino_tokenizers, iGPU] + CPU_Functional_Tests, TensorFlow_Models_Tests_Precommit, PyTorch_Models_Tests, NVIDIA_Plugin, Openvino_tokenizers] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/.github/workflows/linux_cpu_dev.yml b/.github/workflows/linux_cpu_dev.yml new file mode 100644 index 00000000000000..94a8d308f54fe3 --- /dev/null +++ b/.github/workflows/linux_cpu_dev.yml @@ -0,0 +1,283 @@ +name: Linux developer workflow for CPU plugin (Ubuntu 20.04) +on: + workflow_dispatch: + pull_request: + paths: + - '.github/workflows/linux_cpu_dev.yml' + - 'src/common/snippets/**' + - 'src/plugins/intel_cpu/src/nodes/subgraph.cpp' + - 'src/plugins/intel_cpu/src/nodes/subgraph.h' + - 'src/plugins/intel_cpu/src/emitters/snippets/**' + - 'src/plugins/intel_cpu/src/emitters/tpp/**' + - 'src/plugins/intel_cpu/src/transformations/snippets/**' + - 'src/plugins/intel_cpu/src/transformations/tpp/**' + +concurrency: + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-linux-cpu-dev + cancel-in-progress: true + +permissions: read-all + +env: + PIP_CACHE_PATH: /mount/caches/pip/linux + +jobs: + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg' + + - name: Show affected components + run: | + echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + Docker: + needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_20_04_x64 + ov_test/ubuntu_20_04_x64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + + Build: + needs: Docker + timeout-minutes: 150 + defaults: + run: + shell: bash + runs-on: aks-linux-16-cores-32gb + container: + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }} + volumes: + - /mount:/mount + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input + CMAKE_BUILD_TYPE: 'Release' + CMAKE_GENERATOR: 'Ninja Multi-Config' + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_ERROR_LOG: /__w/openvino/sccache_log.txt + SCCACHE_LOG: warn + GITHUB_WORKSPACE: '/__w/openvino/openvino' + OPENVINO_REPO: /__w/openvino/openvino/openvino + INSTALL_DIR: /__w/openvino/openvino/openvino_install + INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install + BUILD_DIR: /__w/openvino/openvino/openvino_build + SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release + if: "!needs.smart_ci.outputs.skip_workflow" + + steps: + - name: Clone OpenVINO + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + path: ${{ env.OPENVINO_REPO }} + submodules: 'true' + + - name: System info + uses: ./openvino/.github/actions/system_info + + # + # Build + # + + - name: CMake configure - OpenVINO + run: | + cmake \ + -G "${{ env.CMAKE_GENERATOR }}" \ + -DENABLE_CPPLINT=OFF \ + -DENABLE_NCC_STYLE=OFF \ + -DENABLE_TESTS=ON \ + -DENABLE_SNIPPETS_LIBXSMM_TPP=ON \ + -DENABLE_STRICT_DEPENDENCIES=OFF \ + -DENABLE_SYSTEM_OPENCL=ON \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DCPACK_GENERATOR=TGZ \ + -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ + -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ + -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ + -S ${OPENVINO_REPO} \ + -B ${BUILD_DIR} + + - name: Clean sccache stats + run: ${SCCACHE_PATH} --zero-stats + + - name: Cmake build - OpenVINO + run: cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats + + - name: Cmake install - OpenVINO + run: | + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake + + - name: Pack Artifacts + run: | + + pushd ${INSTALL_DIR} + tar -czvf ${BUILD_DIR}/openvino_package.tar.gz * + popd + + pushd ${INSTALL_TEST_DIR} + tar -czvf ${BUILD_DIR}/openvino_tests.tar.gz * + popd + + # + # Upload build artifacts and logs + # + - name: Upload build logs + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + if: always() + with: + name: build_logs + path: ${{ env.SCCACHE_ERROR_LOG }} + if-no-files-found: 'ignore' + + - name: Upload openvino package + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + with: + name: openvino_package + path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz + if-no-files-found: 'error' + + - name: Upload openvino tests package + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + with: + name: openvino_tests + path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + if-no-files-found: 'error' + + CPU_Functional_Tests: + name: CPU functional tests + # WA: currently, snippet tests are expectedly failing, we need green CI until all testcases will be fixed + if: ${{ github.event_name == 'workflow_dispatch'}} + needs: [ Docker, Build, Smart_CI ] + timeout-minutes: 30 + runs-on: aks-linux-8-cores-32gb + container: + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_x64 }} + defaults: + run: + shell: bash + env: + DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + PARALLEL_TEST_SCRIPT: ${{ github.workspace }}/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py + steps: + - name: Download OpenVINO package + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + # Needed as ${{ github.workspace }} is not working correctly when using Docker + - name: Setup Variables + run: | + echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" + echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" + echo "PARALLEL_TEST_SCRIPT=$GITHUB_WORKSPACE/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py" >> "$GITHUB_ENV" + + - name: Extract OpenVINO packages + run: | + pushd $INSTALL_DIR + tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + popd + + pushd $INSTALL_TEST_DIR + tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + popd + + - name: Fetch setup_python action + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python 3.11 + uses: ./openvino/.github/actions/setup_python + with: + version: '3.11' + should-setup-pip-paths: 'false' + self-hosted-runner: ${{ runner.os == 'Linux' }} + + - name: Install python dependencies for run_parallel.py + run: python3 -m pip install -r ${INSTALL_TEST_DIR}/functional_test_utils/layer_tests_summary/requirements.txt + + - name: Intel CPU plugin func tests (parallel) + run: | + # Needed as the Linux CC does not require setupvars to work + if [[ -f "${INSTALL_DIR}/setupvars.sh" ]]; then + source ${INSTALL_DIR}/setupvars.sh + fi + # Needed as ze_loader.so is under INSTALL_TEST_DIR + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${INSTALL_TEST_DIR} + + python3 ${PARALLEL_TEST_SCRIPT} -e ${INSTALL_TEST_DIR}/ov_cpu_func_tests -w ${INSTALL_TEST_DIR} -s suite -rf 0 -- --gtest_filter=*smoke_Snippets* --gtest_print_time=1 + timeout-minutes: 25 + + - name: Upload Test Results + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + if: always() + with: + name: test-results-functional-cpu + path: | + ${{ env.INSTALL_TEST_DIR }}/temp/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/failed/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/crashed/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/hanged/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/interapted/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/hash_table.csv + if-no-files-found: 'error' diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index 79d2e740261161..6f089f205d3b1d 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -41,8 +41,8 @@ jobs: # SANITIZER_CMAKE_OPTION: '-DENABLE_THREAD_SANITIZER=ON' env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input - CMAKE_BUILD_TYPE: 'Release' - CMAKE_GENERATOR: 'Ninja Multi-Config' + CMAKE_BUILD_TYPE: 'RelWithDebInfo' + CMAKE_GENERATOR: 'Ninja' GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib @@ -159,9 +159,9 @@ jobs: - name: Cmake install - OpenVINO run: | - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} - name: Remove unused files to free space run: rm -rf ${BUILD_DIR}/* @@ -286,7 +286,7 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVCoreUT.xml - name: OpenVINO Inference Functional Tests - if: ${{ 'false' }} # Ticket: 134410 + if: always() run: | source ${INSTALL_DIR}/setupvars.sh diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index d044bcd0fad4c8..32f5474d14ce76 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -4,25 +4,25 @@ on: schedule: # at 00:00 on workdays - cron: '0 0 * * 1,2,3,4,5' -# pull_request: -# paths-ignore: -# - '**/docs/**' -# - 'docs/**' -# - '**/**.md' -# - '**.md' -# - '**/layer_tests_summary/**' -# - '**/conformance/**' -# push: -# paths-ignore: -# - '**/docs/**' -# - 'docs/**' -# - '**/**.md' -# - '**.md' -# - '**/layer_tests_summary/**' -# - '**/conformance/**' -# branches: -# - master -# - 'releases/**' + #pull_request: + # paths-ignore: + # - '**/docs/**' + # - 'docs/**' + # - '**/**.md' + # - '**.md' + # - '**/layer_tests_summary/**' + # - '**/conformance/**' + #push: + # paths-ignore: + # - '**/docs/**' + # - 'docs/**' + # - '**/**.md' + # - '**.md' + # - '**/layer_tests_summary/**' + # - '**/conformance/**' + # branches: + # - master + # - 'releases/**' concurrency: # github.ref is not unique in post-commit diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index c9a9fceb2f8b41..26eb440eb87cb2 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -4,25 +4,25 @@ on: schedule: # at 00:00 on workdays - cron: '0 0 * * 1,2,3,4,5' -# pull_request: -# paths-ignore: -# - '**/docs/**' -# - 'docs/**' -# - '**/**.md' -# - '**.md' -# - '**/layer_tests_summary/**' -# - '**/conformance/**' -# push: -# paths-ignore: -# - '**/docs/**' -# - 'docs/**' -# - '**/**.md' -# - '**.md' -# - '**/layer_tests_summary/**' -# - '**/conformance/**' -# branches: -# - master -# - 'releases/**' + #pull_request: + # paths-ignore: + # - '**/docs/**' + # - 'docs/**' + # - '**/**.md' + # - '**.md' + # - '**/layer_tests_summary/**' + # - '**/conformance/**' + #push: + # paths-ignore: + # - '**/docs/**' + # - 'docs/**' + # - '**/**.md' + # - '**.md' + # - '**/layer_tests_summary/**' + # - '**/conformance/**' + # branches: + # - master + # - 'releases/**' concurrency: # github.ref is not unique in post-commit diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index 469ccda02f6944..902fb0dfcb00f0 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -20,6 +20,7 @@ jobs: runs-on: ubuntu-latest outputs: affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action @@ -40,14 +41,41 @@ jobs: skip_when_only_listed_labels_set: 'docs' skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*' - Build: + - name: Show affected components + run: | + echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + Docker: needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/webassembly + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + + Build: + needs: [Docker, Smart_CI] defaults: run: shell: bash runs-on: aks-linux-16-cores-32gb container: - image: emscripten/emsdk + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.webassembly }} volumes: - /mount:/mount options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING @@ -62,20 +90,12 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: webassembly_Release if: "!needs.smart_ci.outputs.skip_workflow" steps: - - name: Install git - run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates - - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: path: 'openvino' submodules: 'true' - - name: Install sccache - uses: mozilla-actions/sccache-action@89e9040de88b577a072e3760aaf59f585da083af # v0.0.5 - with: - version: "v0.7.5" - - name: emcmake cmake - configure run: | emcmake cmake \ diff --git a/.gitmodules b/.gitmodules index 5f6560797c69b7..df0401a04ca44f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -84,3 +84,6 @@ [submodule "src/plugins/intel_cpu/thirdparty/libxsmm"] path = src/plugins/intel_cpu/thirdparty/libxsmm url = https://github.com/libxsmm/libxsmm.git +[submodule "src/plugins/intel_cpu/thirdparty/shl"] + path = src/plugins/intel_cpu/thirdparty/shl + url = https://github.com/openvinotoolkit/shl.git diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 219d464682b016..6edda8136b338f 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -104,10 +104,10 @@ function(ov_download_tbb) elseif(LINUX AND X86_64 AND OPENVINO_GNU_LIBC AND OV_LIBC_VERSION VERSION_GREATER_EQUAL 2.17) # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17) RESOLVE_DEPENDENCY(TBB - ARCHIVE_LIN "oneapi-tbb-2021.2.5-lin-trim.tgz" + ARCHIVE_LIN "oneapi-tbb-2021.2.4-lin.tgz" TARGET_PATH "${TEMP}/tbb" ENVIRONMENT "TBBROOT" - SHA256 "9bea2c838df3085d292989d643523dc1cedce9b46d5a03eec90104151b49a180" + SHA256 "6523661559a340e88131472ea9a595582c306af083e55293b7357d11b8015546" USE_NEW_LOCATION TRUE) elseif(YOCTO_AARCH64) RESOLVE_DEPENDENCY(TBB diff --git a/cmake/developer_package/api_validator/api_validator.cmake b/cmake/developer_package/api_validator/api_validator.cmake index 68e6d1fccb5e7d..ff6b0fbaa061e9 100644 --- a/cmake/developer_package/api_validator/api_validator.cmake +++ b/cmake/developer_package/api_validator/api_validator.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -function(ov_search_api_validator) +macro(ov_search_api_validator) if(NOT ENABLE_API_VALIDATOR) return() endif() @@ -15,8 +15,6 @@ function(ov_search_api_validator) string(REPLACE "\\" "" WINDOWS_SDK_VERSION $ENV{WindowsSDKVersion}) set(CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION ${WINDOWS_SDK_VERSION}) message(STATUS "Use ${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION} Windows SDK version") - # set to parent scope as well for later usage in '_ov_add_api_validator_post_build_step' - set(CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION ${WINDOWS_SDK_VERSION} PARENT_SCOPE) else() message(FATAL_ERROR "WindowsSDKVersion environment variable is not set,\ can't find Windows SDK version. Try to use vcvarsall.bat script") @@ -47,9 +45,12 @@ can't find Windows SDK version. Try to use vcvarsall.bat script") message(STATUS "Found apivalidator: ${ONECORE_API_VALIDATOR}") endif() endif() -endfunction() +endmacro() + -ov_search_api_validator() +if(ENABLE_API_VALIDATOR) + ov_search_api_validator() +endif() function(_ov_add_api_validator_post_build_step_recursive) cmake_parse_arguments(API_VALIDATOR "" "TARGET" "" ${ARGN}) diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake index f846d8a0050afc..a49dce9bce7b50 100644 --- a/cmake/developer_package/compile_flags/os_flags.cmake +++ b/cmake/developer_package/compile_flags/os_flags.cmake @@ -75,11 +75,10 @@ macro(ov_dev_package_no_errors) endif() endif() - if (CMAKE_COMPILE_WARNING_AS_ERROR AND WIN32) + if(CMAKE_COMPILE_WARNING_AS_ERROR AND WIN32) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") if(CMAKE_VERSION VERSION_LESS 3.24) - string(REPLACE "/WX" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REPLACE "/WX" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + ov_add_compiler_flags(/WX-) endif() string(REPLACE "/WX" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") endif() diff --git a/cmake/developer_package/packaging/archive.cmake b/cmake/developer_package/packaging/archive.cmake index d4e9c65096d245..853b7649e88ddf 100644 --- a/cmake/developer_package/packaging/archive.cmake +++ b/cmake/developer_package/packaging/archive.cmake @@ -94,6 +94,8 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PKG_CONFIG_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) # symbolic links set(OV_CPACK_COMP_LINKS_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) + # npu internal tools + unset(OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL) endmacro() ov_define_component_include_rules() diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index 247f107b83b6fc..4ec96dc28b53e8 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -111,6 +111,8 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PKG_CONFIG_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) # symbolic links set(OV_CPACK_COMP_LINKS_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) + # npu internal tools + set(OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL EXCLUDE_FROM_ALL) endmacro() ov_define_component_include_rules() diff --git a/cmake/developer_package/packaging/debian/debian.cmake b/cmake/developer_package/packaging/debian/debian.cmake index 1b29dd7697d1c7..c7f49419111cea 100644 --- a/cmake/developer_package/packaging/debian/debian.cmake +++ b/cmake/developer_package/packaging/debian/debian.cmake @@ -118,6 +118,8 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PKG_CONFIG_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) # symbolic links set(OV_CPACK_COMP_LINKS_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) + # npu internal tools + set(OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL EXCLUDE_FROM_ALL) endmacro() ov_define_component_include_rules() diff --git a/cmake/developer_package/packaging/npm.cmake b/cmake/developer_package/packaging/npm.cmake index 996b55e0ba032f..24453965125348 100644 --- a/cmake/developer_package/packaging/npm.cmake +++ b/cmake/developer_package/packaging/npm.cmake @@ -85,6 +85,8 @@ macro(ov_define_component_include_rules) unset(OV_CPACK_COMP_PKG_CONFIG_EXCLUDE_ALL) # symbolic links unset(OV_CPACK_COMP_LINKS_EXCLUDE_ALL) + # npu internal tools + set(OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL EXCLUDE_FROM_ALL) endmacro() ov_define_component_include_rules() diff --git a/cmake/developer_package/packaging/nsis.cmake b/cmake/developer_package/packaging/nsis.cmake index b84569e4f0b1d1..f5f9a233e8b87f 100644 --- a/cmake/developer_package/packaging/nsis.cmake +++ b/cmake/developer_package/packaging/nsis.cmake @@ -140,6 +140,8 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PKG_CONFIG_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) # symbolic links set(OV_CPACK_COMP_LINKS_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) + # npu internal tools + set(OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL EXCLUDE_FROM_ALL) endmacro() ov_define_component_include_rules() diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake index 56a0a12647079c..7c9fb4f22a372d 100644 --- a/cmake/developer_package/packaging/rpm/rpm.cmake +++ b/cmake/developer_package/packaging/rpm/rpm.cmake @@ -109,6 +109,8 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PKG_CONFIG_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) # symbolic links set(OV_CPACK_COMP_LINKS_EXCLUDE_ALL ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL}) + # npu internal tools + set(OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL EXCLUDE_FROM_ALL) endmacro() ov_define_component_include_rules() diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index b97f8eea67c8a6..ddccae48778c21 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -94,6 +94,7 @@ macro(ov_cpack_settings) 2024.0.0 2024.1.0 2024.2.0 + 2024.3.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index 4d06ebff496421..e8132b5bba6043 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -82,6 +82,7 @@ macro(ov_cpack_settings) 2024.0.0 2024.1.0 2024.2.0 + 2024.3.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake b/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake new file mode 100644 index 00000000000000..eaf5bf4e7c6bc3 --- /dev/null +++ b/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake @@ -0,0 +1,46 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# NOTE: use T-Head compiler: +# git clone https://github.com/T-head-Semi/xuantie-gnu-toolchain.git +# ./configure --prefix=/opt/riscv +# make linux +# -DRISCV_TOOLCHAIN_ROOT=/opt/riscv + +# To enable cross-compilation with python (for example, on Ubuntu 22.04): +# $ echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy main >> riscv64-sources.list +# $ echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy universe >> riscv64-sources.list +# $ echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main >> riscv64-sources.list +# $ echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security main >> riscv64-sources.list +# $ mv riscv64-sources.list /etc/apt/sources.list.d/ +# $ dpkg --add-architecture riscv64 +# $ apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/riscv64-sources.list +# $ apt-get install -y --no-install-recommends libpython3-dev:riscv64 +# $ ln -s /usr/include/riscv64-linux-gnu/ /usr/include/python3.10/ + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR riscv64) + +set(RISCV64_THEAD ON) +set(RISCV64_RVV0p7 ON) + +set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") + +set(CMAKE_C_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-g++) +set(CMAKE_STRIP ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-strip) +set(PKG_CONFIG_EXECUTABLE "NOT-FOUND" CACHE PATH "Path to RISC-V pkg-config") + +# Don't run the linker on compiler check +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set(CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS_INIT} -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d") +set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d") + +# system libc provides pthread functions (as detected by FindThreads.cmake), but not all functions are available +# WA: use pthread explicitly, since we know it's available in current toolchain +set(CMAKE_EXE_LINKER_FLAGS_INIT "-pthread") +set(CMAKE_MODULE_LINKER_FLAGS_INIT "-pthread") +set(CMAKE_SHARED_LINKER_FLAGS_INIT "-pthread") diff --git a/docs/articles_en/about-openvino.rst b/docs/articles_en/about-openvino.rst index 3d472fb55d4853..a9b599960d2e2b 100644 --- a/docs/articles_en/about-openvino.rst +++ b/docs/articles_en/about-openvino.rst @@ -11,7 +11,6 @@ About OpenVINO about-openvino/performance-benchmarks about-openvino/compatibility-and-support Release Notes - Additional Resources OpenVINO is a toolkit for simple and efficient deployment of various deep learning models. In this section you will find information on the product itself, as well as the software diff --git a/docs/articles_en/about-openvino/additional-resources.rst b/docs/articles_en/about-openvino/additional-resources.rst deleted file mode 100644 index cb8d0fc62f244a..00000000000000 --- a/docs/articles_en/about-openvino/additional-resources.rst +++ /dev/null @@ -1,34 +0,0 @@ -.. {#resources} - -Additional Resources -==================== - - - -.. meta:: - :description: Learn more about OpenVINO from benchmark results, case studies - and lists of supported models, operations and devices. - -.. toctree:: - :maxdepth: 1 - :hidden: - - additional-resources/glossary - Legal and Responsible AI Information <./additional-resources/legal-information> - additional-resources/telemetry - Case Studies - - -:doc:`Performance Benchmarks ` contain results from benchmarking models with OpenVINO on Intel hardware. - -:doc:`Glossary ` contains terms used in OpenVINO. - -:doc:`Legal and Responsible AI Information ` provides trademark information and other legal statements. - -:doc:`OpenVINO™ Telemetry ` has detailed information on the telemetry data collection. - -`Case Studies `__ are articles about real-world examples of OpenVINO™ usage. - - - - diff --git a/docs/articles_en/about-openvino/additional-resources/glossary.rst b/docs/articles_en/about-openvino/additional-resources/glossary.rst index eaaf9965d3b583..9aba2b395525c2 100644 --- a/docs/articles_en/about-openvino/additional-resources/glossary.rst +++ b/docs/articles_en/about-openvino/additional-resources/glossary.rst @@ -1,4 +1,4 @@ -.. {#openvino_docs_OV_Glossary} +:orphan: Glossary ======== diff --git a/docs/articles_en/about-openvino/additional-resources/telemetry.rst b/docs/articles_en/about-openvino/additional-resources/telemetry.rst index 34411abd26b224..1e4b3b34123163 100644 --- a/docs/articles_en/about-openvino/additional-resources/telemetry.rst +++ b/docs/articles_en/about-openvino/additional-resources/telemetry.rst @@ -1,4 +1,4 @@ -.. {#openvino_docs_telemetry_information} +:orphan: OpenVINO™ Telemetry ===================== @@ -10,9 +10,9 @@ OpenVINO™ Telemetry To facilitate debugging and further development, OpenVINO™ collects anonymous telemetry data. Anonymous telemetry data is collected by default, but you can stop data collection anytime by running the command ``opt_in_out --opt_out``. -It does not extend to any other Intel software, hardware, website usage, or other products. +It does not extend to any other Intel software, hardware, website usage, or other products. -Google Analytics is used for telemetry purposes. Refer to +Google Analytics is used for telemetry purposes. Refer to `Google Analytics support `__ to understand how the data is collected and processed. Enable or disable Telemetry reporting @@ -21,7 +21,7 @@ Enable or disable Telemetry reporting Changing consent decision +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -You can change your data collection decision with the following command lines: +You can change your data collection decision with the following command lines: ``opt_in_out --opt_in`` - enable telemetry @@ -35,26 +35,26 @@ Telemetry Data Collection Details .. tab-item:: Telemetry Data Collected :sync: telemetry-data-collected - - * Failure reports - * Error reports - * Usage data - + + * Failure reports + * Error reports + * Usage data + .. tab-item:: Tools Collecting Data :sync: tools-collecting-data - - * Model conversion API - * Model Downloader - * Accuracy Checker - * Post-Training Optimization Toolkit + + * Model conversion API + * Model Downloader + * Accuracy Checker + * Post-Training Optimization Toolkit * Neural Network Compression Framework * Model Converter * Model Quantizer - + .. tab-item:: Telemetry Data Retention :sync: telemetry-data-retention - + Telemetry data is retained in Google Analytics for a maximum of 14 months. - Any raw data that has reached the 14-month threshold is deleted from Google Analytics on a monthly basis. + Any raw data that has reached the 14-month threshold is deleted from Google Analytics on a monthly basis. diff --git a/docs/articles_en/about-openvino/additional-resources/legal-information.rst b/docs/articles_en/about-openvino/additional-resources/terms-of-use.rst similarity index 67% rename from docs/articles_en/about-openvino/additional-resources/legal-information.rst rename to docs/articles_en/about-openvino/additional-resources/terms-of-use.rst index 128bc8479e52d5..afdf10aef06c5c 100644 --- a/docs/articles_en/about-openvino/additional-resources/legal-information.rst +++ b/docs/articles_en/about-openvino/additional-resources/terms-of-use.rst @@ -1,30 +1,25 @@ -.. {#openvino_docs_Legal_Information} +:orphan: -Legal and Responsible AI Information +Terms of Use ===================================== - .. meta:: - :description: Learn about legal information and policies related to the use - of Intel® Distribution of OpenVINO™ toolkit. + :description: Learn about legal information and policies related to the information + published in OpenVINO™ documentation. -Performance varies by use, configuration and other factors. Learn more at -`www.intel.com/PerformanceIndex `__. -Performance results are based on testing as of dates shown in configurations and may not -reflect all publicly available updates. See backup for configuration details. No product or -component can be absolutely secure. +Intel Global Human Right Principles +########################################################### -Your costs and results may vary. +Intel is committed to respecting human rights and avoiding causing or contributing to adverse +impacts on human rights. See +`Intel's Global Human Rights Principles `__. +Intel's products and software are intended only to be used in applications that do not cause or +contribute to adverse impacts on human rights. -Intel technologies may require enabled hardware, software or service activation. -OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos. -© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel -Corporation or its subsidiaries. Other names and brands may be claimed as the property of -others. OpenVINO™ Logo ########################################################### @@ -33,25 +28,36 @@ To build equity around the project, the OpenVINO logo was created for both Intel usage. The logo may only be used to represent the OpenVINO toolkit and offerings built using the OpenVINO toolkit. -Logo Usage Guidelines -########################################################### - The OpenVINO logo must be used in connection with truthful, non-misleading references to the OpenVINO toolkit, and for no other purpose. Modification of the logo or use of any separate element(s) of the logo alone is not allowed. -Intel Global Human Right Principles -########################################################### -Intel is committed to respecting human rights and avoiding causing or contributing to adverse -impacts on human rights. See `Intel's Global Human Rights Principles `__. -Intel's products and software are intended only to be used in applications that do not cause or -contribute to adverse impacts on human rights. + Model Card Statement ########################################################### -We recommend that users, wherever you are sourcing the model from, should check for a model card, +We recommend that, wherever you are sourcing the model from, you should check for a model card, consult the model card for each model you access and use, and create one if you are developing or updating a model. A model card is a short document that provides key information to assess -performance and validation and ensure appropriate use. \ No newline at end of file +performance and validation and ensure appropriate use. + + +Performance claims +########################################################### + +Performance varies by use, configuration and other factors. Learn more at +`www.intel.com/PerformanceIndex `__. + +Performance results are based on testing as of dates shown in configurations and may not +reflect all publicly available updates. + +Your costs and results may vary. + + +No product or component can be absolutely secure. + +Intel technologies may require enabled hardware, software or service activation. + +OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index fbb3b948165dd2..890d48bcf75502 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -1,8 +1,5 @@ -.. {#openvino_supported_devices} - - -Inference Device Support -======================== +Supported Inference Devices +============================ .. meta:: :description: Check the list of devices used by OpenVINO to run inference @@ -26,7 +23,7 @@ OpenVINO offers the option of running automated inference with the following inf multiple devices at the same time. * :doc:`Heterogeneous Inference <../../openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution>` - enables splitting inference among several devices automatically, for example, if one device doesn't support certain operations. -* :doc:`Multi-device Inference <../../openvino-workflow/running-inference/inference-devices-and-modes/multi-device>` - executes inference on multiple devices. +* :doc:`(LEGACY) Multi-device Inference <./../../documentation/legacy-features/multi-device>` - executes inference on multiple devices. Currently, this mode is considered a legacy solution. Using Automatic Device Selection is advised. * :doc:`Automatic Batching <../../openvino-workflow/running-inference/inference-devices-and-modes/automatic-batching>` - automatically groups inference requests to improve device utilization. @@ -36,20 +33,20 @@ OpenVINO offers the option of running automated inference with the following inf Feature Support and API Coverage ################################# -=============================================================================================================================== ======= ========== =========== - Supported Feature CPU GPU NPU -=============================================================================================================================== ======= ========== =========== - :doc:`Heterogeneous execution <../../openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution>` Yes Yes No - :doc:`Multi-device execution <../../openvino-workflow/running-inference/inference-devices-and-modes/multi-device>` Yes Yes Partial - :doc:`Automatic batching <../../openvino-workflow/running-inference/inference-devices-and-modes/automatic-batching>` No Yes No - :doc:`Multi-stream execution <../../openvino-workflow/running-inference/optimize-inference/optimizing-throughput>` Yes Yes No - :doc:`Models caching <../../openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview>` Yes Partial Yes - :doc:`Dynamic shapes <../../openvino-workflow/running-inference/dynamic-shapes>` Yes Partial No - :doc:`Import/Export <../../documentation/openvino-ecosystem>` Yes Yes Yes - :doc:`Preprocessing acceleration <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>` Yes Yes No - :doc:`Stateful models <../../openvino-workflow/running-inference/stateful-models>` Yes Yes Yes - :doc:`Extensibility <../../documentation/openvino-extensibility>` Yes Yes No -=============================================================================================================================== ======= ========== =========== +======================================================================================================================================== ======= ========== =========== + Supported Feature CPU GPU NPU +======================================================================================================================================== ======= ========== =========== + :doc:`Heterogeneous execution <../../openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution>` Yes Yes No + :doc:`(LEGACY) Multi-device execution <./../../documentation/legacy-features/multi-device>` Yes Yes Partial + :doc:`Automatic batching <../../openvino-workflow/running-inference/inference-devices-and-modes/automatic-batching>` No Yes No + :doc:`Multi-stream execution <../../openvino-workflow/running-inference/optimize-inference/optimizing-throughput>` Yes Yes No + :doc:`Models caching <../../openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview>` Yes Partial Yes + :doc:`Dynamic shapes <../../openvino-workflow/running-inference/dynamic-shapes>` Yes Partial No + :doc:`Import/Export <../../documentation/openvino-ecosystem>` Yes Yes Yes + :doc:`Preprocessing acceleration <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>` Yes Yes No + :doc:`Stateful models <../../openvino-workflow/running-inference/stateful-models>` Yes Yes Yes + :doc:`Extensibility <../../documentation/openvino-extensibility>` Yes Yes No +======================================================================================================================================== ======= ========== =========== +-------------------------+-----------+------------------+-------------------+ @@ -69,8 +66,6 @@ Feature Support and API Coverage +-------------------------+-----------+------------------+-------------------+ | BATCH | 26.0 % | 100.0 % | 58.97 % | +-------------------------+-----------+------------------+-------------------+ -| MULTI | 30.0 % | 100.0 % | 58.97 % | -+-------------------------+-----------+------------------+-------------------+ | HETERO | 30.0 % | 99.23 % | 58.97 % | +-------------------------+-----------+------------------+-------------------+ | || Percentage of API supported by the device, | diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst index aa4a2a984a3ca0..85e04b66bd7158 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst @@ -1,30 +1,14 @@ -Supported Models -======================== +AI Models verified for OpenVINO™ +===================================== -The following table lists a selection of models that are validated against various hardware -devices. The list includes only models used in validation, other models from frameworks supported -by OpenVINO may also work properly. +The following is a list of models that have been verified to work with OpenVINO. Note that other +models from OpenVINO-supported frameworks may also work properly but have not been tested. +**AI Models that run on Intel® Core Ultra™ Processors with OpenVINO™ toolkit:** .. raw:: html -
- - - .. csv-table:: @@ -34,16 +18,16 @@ by OpenVINO may also work properly. :file: ../../_static/download/supported_models.csv -| Note: -| The results as of June 17 2024, for OpenVINO version 2024.2. +Check marks indicate models that passed inference with no errors. Empty cells indicate models +that were not tested. No failing runs producing an error have been recorded. + +In the precision column, the "optimum-intel default" label corresponds to FP32 for small models +and INT8 for models greater than 1B parameters. -| The validation process involves using OpenVINO, natively or as a backend, to load each model - onto the designated hardware and execute inference. If no errors are reported and inference - finishes, the model receives the **passed** status (indicated by a check mark in the table). - The models that are not tested are indicated by **empty** status cells. -| The models come from different public model repositories, such as, OpenVINO Model Zoo, - ONNX Model Zoo, Pytorch Model Zoo, and HuggingFace. +| Note: +| The results as of June 17 2024, for OpenVINO version 2024.2. +| The models come from different public model repositories, such as Pytorch Model Zoo and + HuggingFace; they were executed on the designated hardware with OpenVINO either natively or + as a backend. -| In the precision column, the "optimum-intel default" label corresponds to FP32 for small - models and INT8 for models greater than 1B parameters. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index ced358db18c044..e884dd0b90370b 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -205,10 +205,10 @@ You can also test performance for your system yourself, following the guide on * Intel® Distribution of OpenVINO™ toolkit performance results are based on release - 2024.2, as of June 17, 2024. + 2024.2, as of June 28, 2024. * OpenVINO Model Server performance results are based on release - 2024.1, as of April 26, 2024. + 2024.2, as of June 28, 2024. The results may not reflect all publicly available updates. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software, or service @@ -236,4 +236,4 @@ for non-Intel products. Results may vary. For more information, see :doc:`F.A.Q. <./performance-benchmarks/performance-benchmarks-faq>` - See :doc:`Legal Information <./additional-resources/legal-information>`. \ No newline at end of file + See :doc:`Legal Information <./additional-resources/terms-of-use>`. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index f174b9a9fd1d3d..e7a4d69d13baac 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -1,64 +1,24 @@ Most Efficient Large Language Models for AI PC ============================================== -This page is regularly updated to help you identify the best-performing LLMs on the Intel® Core™ Ultra processor family and AI PCs. +This page is regularly updated to help you identify the best-performing LLMs on the +Intel® Core™ Ultra processor family and AI PCs. -The table below lists the key performance indicators for a selection of Large Language Models running on an Intel® Core™ Ultra 7-165H based system. +The table below lists the key performance indicators for a selection of Large Language Models, +running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. -For complete information on the system config, see: `Hardware Platforms [PDF] `__ .. raw:: html -

- - - - - - - - .. csv-table:: :class: modeldata stripe :name: supportedModelsTable :header-rows: 1 - :file: ../../_static/llm_models.csv - + :file: ../../_static/download/llm_models.csv +For complete information on the system config, see: `Hardware Platforms [PDF] `__ \ No newline at end of file diff --git a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst index a3e414c7e3768d..d93e9b553cc12c 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst @@ -184,4 +184,4 @@ insights in the application-level performance on the timeline view. Results may vary. For more information, see :doc:`F.A.Q. <./performance-benchmarks-faq>` and :doc:`Platforms, Configurations, Methodology <../performance-benchmarks>`. - See :doc:`Legal Information <../additional-resources/legal-information>`. \ No newline at end of file + See :doc:`Legal Information <../additional-resources/terms-of-use>`. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst index 4c15d7ddc75499..710dc3b403e63f 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst @@ -293,4 +293,4 @@ accuracy for the model. Results may vary. For more information, see :doc:`F.A.Q. <./performance-benchmarks-faq>` and :doc:`Platforms, Configurations, Methodology <../performance-benchmarks>`. - See :doc:`Legal Information <../additional-resources/legal-information>`. \ No newline at end of file + See :doc:`Legal Information <../additional-resources/terms-of-use>`. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst index 6c731dc374010b..e8e099c5ae4b1f 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst @@ -133,8 +133,7 @@ Performance Information F.A.Q. .. dropdown:: Why are INT8 optimized models used for benchmarking on CPUs with no VNNI support? - The benefit of low-precision optimization using the OpenVINO™ - toolkit model optimizer extends beyond processors supporting VNNI + The benefit of low-precision optimization extends beyond processors supporting VNNI through Intel® DL Boost. The reduced bit width of INT8 compared to FP32 allows Intel® CPU to process the data faster. Therefore, it offers better throughput on any converted model, regardless of the @@ -174,6 +173,6 @@ Performance Information F.A.Q. .. container:: benchmark-banner - Results may vary. For more information, see - :doc:`Platforms, Configurations, Methodology <../performance-benchmarks>`. - See :doc:`Legal Information <../additional-resources/legal-information>`. \ No newline at end of file + Results may vary. For more information, see: + :doc:`Platforms, Configurations, Methodology <../performance-benchmarks>`, + :doc:`Legal Information <../additional-resources/terms-of-use>`. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 89d983941ff6f4..c8c32126e3e10c 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -24,7 +24,8 @@ OpenVINO Release Notes What's new +++++++++++++++++++++++++++++ -* More Gen AI coverage and framework integrations to minimize code changes. +* More :doc:`Gen AI <../learn-openvino/llm_inference_guide/genai-guide>` coverage and framework + integrations to minimize code changes. * Llama 3 optimizations for CPUs, built-in GPUs, and discrete GPUs for improved performance and efficient memory usage. @@ -56,14 +57,13 @@ What's new Batching and PagedAttention, enabling significantly higher throughput for parallel inferencing, especially on Intel® Xeon® processors, when serving LLMs to many concurrent users. - * OpenVINO backend for Triton Server now supports built-in GPUs and discrete GPUs, in - addition to dynamic shapes support. + * OpenVINO backend for Triton Server now supports dynamic input shapes. * Integration of TorchServe through torch.compile OpenVINO backend for easy model deployment, provisioning to multiple instances, model versioning, and maintenance. - * Preview: addition of the Generate API, a simplified API for text generation using large language - models with only a few lines of code. The API is available through the newly launched - OpenVINO GenAI package. + * Preview: addition of the :doc:`Generate API <../learn-openvino/llm_inference_guide/genai-guide>`, + a simplified API for text generation using large language models with only a few lines of + code. The API is available through the newly launched OpenVINO GenAI package. * Support for Intel Atom® Processor X Series. For more details, see :doc:`System Requirements <./release-notes-openvino/system-requirements>`. * Preview: Support for Intel® Xeon® 6 processor. @@ -186,8 +186,8 @@ OpenVINO Model Server * OpenVINO Model server can be now used for text generation use cases using OpenAI compatible API. * Added support for continuous batching and PagedAttention algorithms for text generation with - fast and efficient in high concurrency load especially on Intel Xeon processors. Learn more - about it. + fast and efficient in high concurrency load especially on Intel Xeon processors. + `Learn more about it `__. Neural Network Compression Framework diff --git a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst index a02d133453c8b9..4e7a284dcb4bc5 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst @@ -30,6 +30,7 @@ CPU .. tab-item:: Supported Operating Systems + * Ubuntu 24.04 long-term support (LTS), 64-bit (Kernel 6.8+) * Ubuntu 22.04 long-term support (LTS), 64-bit (Kernel 5.15+) * Ubuntu 20.04 long-term support (LTS), 64-bit (Kernel 5.15+) * Ubuntu 18.04 long-term support (LTS) with limitations, 64-bit (Kernel 5.4+) @@ -59,6 +60,7 @@ GPU .. tab-item:: Supported Operating Systems + * Ubuntu 24.04 long-term support (LTS), 64-bit * Ubuntu 22.04 long-term support (LTS), 64-bit * Ubuntu 20.04 long-term support (LTS), 64-bit * Windows 10, 64-bit @@ -88,6 +90,7 @@ Intel® Neural Processing Unit .. tab-item:: Operating Systems for NPU + * Ubuntu 24.04 long-term support (LTS), 64-bit * Ubuntu 22.04 long-term support (LTS), 64-bit * Windows 11, 64-bit (22H2, 23H2) @@ -106,6 +109,7 @@ Operating systems and developer environment .. tab-item:: Linux OS + * Ubuntu 24.04 with Linux kernel 6.8+ * Ubuntu 22.04 with Linux kernel 5.15+ * Ubuntu 20.04 with Linux kernel 5.15+ * Red Hat Enterprise Linux 8 with Linux kernel 5.4 diff --git a/docs/articles_en/assets/images/quantized_convolution.png b/docs/articles_en/assets/images/quantized_convolution.png deleted file mode 100644 index 6ccb89816065a2..00000000000000 --- a/docs/articles_en/assets/images/quantized_convolution.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:71365e85be040eb01ed524e568b332d9bb6222c760686c54db4e754f587082c2 -size 31032 diff --git a/docs/articles_en/assets/images/quantized_model_example.png b/docs/articles_en/assets/images/quantized_model_example.png deleted file mode 100644 index d9a037779a756c..00000000000000 --- a/docs/articles_en/assets/images/quantized_model_example.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f68e826cfac63d8e6f8d77aa5b7fc61957a872dfb09b38695fb481044a6ddd5 -size 48327 diff --git a/docs/articles_en/assets/snippets/compile_model_cpu.cpp b/docs/articles_en/assets/snippets/compile_model_cpu.cpp index be84b353cd740b..0e62824e6359e7 100644 --- a/docs/articles_en/assets/snippets/compile_model_cpu.cpp +++ b/docs/articles_en/assets/snippets/compile_model_cpu.cpp @@ -17,4 +17,12 @@ int main() { auto compiled_model = core.compile_model(model, "MULTI:CPU,GPU.0"); //! [compile_model_multi] } + + { + //! [compile_model_auto] + ov::Core core; + auto model = core.read_model("model.xml"); + auto compiled_model = core.compile_model(model, "AUTO:CPU,GPU.0", ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); + //! [compile_model_auto] + } } diff --git a/docs/articles_en/assets/snippets/compile_model_cpu.py b/docs/articles_en/assets/snippets/compile_model_cpu.py index 22b5cb2a188914..7047376e8dd2e4 100644 --- a/docs/articles_en/assets/snippets/compile_model_cpu.py +++ b/docs/articles_en/assets/snippets/compile_model_cpu.py @@ -21,3 +21,8 @@ def main(): core = ov.Core() compiled_model = core.compile_model(model, "MULTI:CPU,GPU.0") #! [compile_model_multi] + + #! [compile_model_auto] + core = ov.Core() + compiled_model = core.compile_model(model, "AUTO:CPU,GPU.0", {hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT}) + #! [compile_model_auto] diff --git a/docs/articles_en/assets/snippets/compile_model_gpu.cpp b/docs/articles_en/assets/snippets/compile_model_gpu.cpp index 0cdb494989df42..8c1890e83bbed4 100644 --- a/docs/articles_en/assets/snippets/compile_model_gpu.cpp +++ b/docs/articles_en/assets/snippets/compile_model_gpu.cpp @@ -35,6 +35,14 @@ int main() { //! [compile_model_multi] } +{ + //! [compile_model_auto] + ov::Core core; + auto model = core.read_model("model.xml"); + auto compiled_model = core.compile_model(model, "AUTO:GPU.1,CPU.0", ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); + //! [compile_model_auto] +} + { //! [compile_model_batch_plugin] ov::Core core; diff --git a/docs/articles_en/assets/snippets/compile_model_gpu.py b/docs/articles_en/assets/snippets/compile_model_gpu.py index 733f162dbc1119..c90fd351b9524c 100644 --- a/docs/articles_en/assets/snippets/compile_model_gpu.py +++ b/docs/articles_en/assets/snippets/compile_model_gpu.py @@ -33,6 +33,11 @@ def main(): compiled_model = core.compile_model(model, "MULTI:GPU.1,GPU.0") #! [compile_model_multi] + #! [compile_model_auto] + core = ov.Core() + compiled_model = core.compile_model(model, "AUTO:GPU.1,CPU.0", {hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT}) + #! [compile_model_auto] + #! [compile_model_batch_plugin] core = ov.Core() compiled_model = core.compile_model(model, "BATCH:GPU") diff --git a/docs/articles_en/assets/snippets/compile_model_npu.cpp b/docs/articles_en/assets/snippets/compile_model_npu.cpp new file mode 100644 index 00000000000000..e4fb38437bac1e --- /dev/null +++ b/docs/articles_en/assets/snippets/compile_model_npu.cpp @@ -0,0 +1,12 @@ +#include + +int main() { +{ + //! [compile_model_default_npu] + ov::Core core; + auto model = core.read_model("model.xml"); + auto compiled_model = core.compile_model(model, "NPU"); + //! [compile_model_default_npu] +} + return 0; +} diff --git a/docs/articles_en/assets/snippets/compile_model_npu.py b/docs/articles_en/assets/snippets/compile_model_npu.py new file mode 100644 index 00000000000000..d4b4e4d90df40d --- /dev/null +++ b/docs/articles_en/assets/snippets/compile_model_npu.py @@ -0,0 +1,18 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import openvino as ov +from snippets import get_model + + +def main(): + model = get_model() + + core = ov.Core() + if "NPU" not in core.available_devices: + return 0 + + #! [compile_model_default_npu] + core = ov.Core() + compiled_model = core.compile_model(model, "NPU") + #! [compile_model_default_npu] diff --git a/docs/articles_en/assets/snippets/ov_hetero.cpp b/docs/articles_en/assets/snippets/ov_hetero.cpp index 791340afff56ef..2c17de269961bf 100644 --- a/docs/articles_en/assets/snippets/ov_hetero.cpp +++ b/docs/articles_en/assets/snippets/ov_hetero.cpp @@ -53,5 +53,13 @@ auto compiled_model = core.compile_model(model, "HETERO", ); //! [configure_fallback_devices] } + +{ +//! [set_pipeline_parallelism] +std::set model_policy = {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL}; +auto compiled_model = + core.compile_model(model, "HETERO:GPU.1,GPU.2", ov::hint::model_distribution_policy(model_policy)); +//! [set_pipeline_parallelism] +} return 0; } diff --git a/docs/articles_en/assets/snippets/ov_hetero.py b/docs/articles_en/assets/snippets/ov_hetero.py index 7f338081f69c48..dc46dea0dbfb6a 100644 --- a/docs/articles_en/assets/snippets/ov_hetero.py +++ b/docs/articles_en/assets/snippets/ov_hetero.py @@ -53,3 +53,15 @@ def main(): core.set_property("CPU", {hints.inference_precision: ov.Type.f32}) compiled_model = core.compile_model(model=model, device_name="HETERO") #! [configure_fallback_devices] + + #! [set_pipeline_parallelism] + import openvino.properties.hint as hints + + compiled_model = core.compile_model( + model, + device_name="HETERO:GPU.1,GPU.2", + config={ + hints.model_distribution_policy: + "PIPELINE_PARALLEL" + }) + #! [set_pipeline_parallelism] diff --git a/docs/articles_en/documentation/legacy-features.rst b/docs/articles_en/documentation/legacy-features.rst index 489e6e77fe4191..5c9095e4e9e972 100644 --- a/docs/articles_en/documentation/legacy-features.rst +++ b/docs/articles_en/documentation/legacy-features.rst @@ -11,6 +11,7 @@ Legacy Features and Components OpenVINO Development Tools package Model Optimizer / Conversion API Open Model ZOO + legacy-features/multi-device Since OpenVINO has grown very rapidly in recent years, a number of its features @@ -63,7 +64,16 @@ offering. become the recommended model source for OpenVINO. - +| **Multi-Device Execution** +| *New solution:* Automatic Device Selection +| *Old solution:* Legacy Multi-Device Execution discontinuation planned for OpenVINO 2025.0 +| +| The behavior and results of the Multi-Device Execution mode are covered by the ``CUMULATIVE_THROUGHPUT`` + option of the Automatic Device Selection. The only difference is that ``CUMULATIVE_THROUGHPUT`` uses + the devices specified by AUTO, which means that adding devices manually is not mandatory, + while with MULTI, the devices had to be specified before the inference. +| :doc:`Check the Automatic Device Selection <../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>` +| :doc:`Check the legacy solution ` Discontinued: ############# diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/multi-device.rst b/docs/articles_en/documentation/legacy-features/multi-device.rst similarity index 86% rename from docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/multi-device.rst rename to docs/articles_en/documentation/legacy-features/multi-device.rst index 6771f834a5be03..77b75e8444d260 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/multi-device.rst +++ b/docs/articles_en/documentation/legacy-features/multi-device.rst @@ -9,11 +9,13 @@ Multi-device execution multiple available computing devices to particular inference requests to execute in parallel. +.. danger:: -To run inference on multiple devices, you can choose either of the following ways: + The Multi-device execution mode described here has been **deprecated**. -- Use the :ref:`CUMULATIVE_THROUGHPUT option ` of the Automatic Device Selection mode. This way, you can use all available devices in the system without the need to specify them. -- Use the Multi-Device execution mode. It shares the same behaviors as the :ref:`CUMULATIVE_THROUGHPUT option ` of the Automatic Device Selection mode. The difference is, it needs or ``ov::device::priorities`` to be set explicitly. + It's functionality is now fully covered by the :ref:`CUMULATIVE_THROUGHPUT ` + option of the :doc:`Automatic Device Selection <../../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>` mode. + This way, all available devices in the system can be used without the need to specify them. How MULTI Works #################### @@ -49,7 +51,8 @@ Following the OpenVINO™ naming convention, the Multi-Device mode is assigned t Specifying the device list explicitly is required by MULTI, as it defines the devices available for inference and sets their priorities. -Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. More details on enumerating devices can be found in :doc:`Inference Devices and Modes <../inference-devices-and-modes>`. +Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. +More details on enumerating devices can be found in :doc:`Inference Devices and Modes <../../openvino-workflow/running-inference/inference-devices-and-modes>`. The following commands are accepted by the API: @@ -70,7 +73,7 @@ The following commands are accepted by the API: :fragment: [part0] -To check what devices are present in the system, you can use the Device API. For information on how to do it, check :doc:`Query device properties and configuration `. +To check what devices are present in the system, you can use the Device API. For information on how to do it, check :doc:`Query device properties and configuration <../../openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties>`. Configuring Individual Devices and Creating the Multi-Device On Top @@ -101,7 +104,7 @@ Querying the Optimal Number of Inference Requests +++++++++++++++++++++++++++++++++++++++++++++++++ When using MULTI, you don't need to sum over included devices yourself, you can query the optimal number of requests directly, -using the :doc:`configure devices ` property: +using the :doc:`configure devices <../../openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties>` property: .. tab-set:: @@ -148,7 +151,7 @@ For best performance when using the MULTI execution mode you should consider a f Additional Resources #################### -- :doc:`Inference Devices and Modes <../inference-devices-and-modes>` -- :doc:`Automatic Device Selection ` +- :doc:`Inference Devices and Modes <../../openvino-workflow/running-inference/inference-devices-and-modes>` +- :doc:`Automatic Device Selection <../../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>` diff --git a/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst b/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst index 78777e777fb3a6..013b2eb9fbc54f 100644 --- a/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst +++ b/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst @@ -68,16 +68,14 @@ to OpenVINO template extension ``Identity`` class. The mapping doesn’t involve any attributes, as operation Identity doesn’t have them. Extension objects, like just constructed ``extension`` can be used to add to the -OpenVINO runtime just before the loading a model that contains custom operations: +OpenVINO runtime just before loading a model that contains custom operations: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_extensions.cpp :language: cpp :fragment: [frontend_extension_read_model] -Or extensions can be constructed in a separately compiled shared library. -Separately compiled library can be used in Model Optimizer or ``benchmark_app``. -Read about how to build and load such a library in the chapter of “Create library with extensions” in -:doc:`Introduction to OpenVINO Extension <../openvino-extensibility>`. +However, extensions can also be constructed in a separately compiled shared library, that is suitable for loading models with custom operations in a Python application or tools like ``benchmark_app``. +For details on how to build and load such library, check the following :ref:`guide `. If operation have multiple inputs and/or outputs they will be mapped in order. The type of elements in input/output tensors should match expected types in the surrounding operations. diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst index fadd846af973dc..d44dc20d1e0b5a 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst @@ -7,7 +7,6 @@ Quantized models compute and restrictions :maxdepth: 1 :hidden: - quantized-models/low-precision-model-representation .. meta:: :description: Learn about the support for quantized models with different @@ -16,8 +15,7 @@ Quantized models compute and restrictions One of the feature of OpenVINO is the support of quantized models with different precisions: INT8, INT4, etc. However, it is up to the plugin to define what exact precisions are supported by the particular HW. -All quantized models which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. -For more details about low-precision model representation please refer to this :doc:`document `. + Interpreting FakeQuantize at runtime #################################### diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models/low-precision-model-representation.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models/low-precision-model-representation.rst deleted file mode 100644 index abecc2cfa8f580..00000000000000 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models/low-precision-model-representation.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. {#openvino_docs_ie_plugin_dg_lp_representation} - -Representation of low-precision models -====================================== - -The goal of this document is to describe how optimized models are represented in OpenVINO Intermediate Representation (IR) and provide guidance -on interpretation rules for such models at runtime. - -Currently, there are two groups of optimization methods that can influence on the IR after applying them to the full-precision model: - -- **Sparsity**. It is represented by zeros inside the weights and this is up to the hardware plugin how to interpret these zeros - (use weights as is or apply special compression algorithms and sparse arithmetic). No additional mask is provided with the model. -- **Quantization**. The rest of this document is dedicated to the representation of quantized models. - -Representation of quantized models -################################### - -The OpenVINO Toolkit represents all the quantized models using the so-called FakeQuantize operation (see the description in -:doc:`this document <../../../../openvino-ir-format/operation-sets/operation-specs/quantization/fake-quantize-1>`). This operation is very expressive and allows mapping values from -arbitrary input and output ranges. The whole idea behind that is quite simple: we project (discretize) the input values to the low-precision -data type using affine transformation (with clamp and rounding) and then reproject discrete values back to the original range and data type. -It can be considered as an emulation of the quantization process which happens at runtime. -In order to be able to execute a particular DL operation in low-precision all its inputs should be quantized i.e. should have FakeQuantize -between operation and data blobs. The figure below shows an example of quantized Convolution which contains two FakeQuantize nodes: one for -weights and one for activations (bias is quantized using the same parameters). - -.. image:: ../../../../../assets/images/quantized_convolution.png - - -Starting from OpenVINO 2020.2 release all the quantized models are represented in the compressed form. It means that the weights -of low-precision operations are converted into the target precision (e.g. INT8). It helps to substantially reduce the model size. -The rest of the parameters can be represented in FLOAT32 or FLOAT16 precision depending on the input full-precision model used in -the quantization process. Fig. 2 below shows an example of the part of the compressed IR. - -.. image:: ../../../../../assets/images/quantized_model_example.png diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/compiled-model.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/compiled-model.rst index d3c7d5d9c3e5a7..4bdbfa0ab4a189 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/compiled-model.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/compiled-model.rst @@ -8,19 +8,20 @@ Compiled Model :description: Use the ov::CompiledModel class as the base class for a compiled model and to create an arbitrary number of ov::InferRequest objects. -ov::CompiledModel class functionality: +``ov::CompiledModel`` class functionality: -* Compile an ov::Model instance to a backend specific graph representation -* Create an arbitrary number of ov::InferRequest objects -* Hold some common resources shared between different instances of ov::InferRequest. For example: +* Compile an ``ov::Model`` instance to a backend specific graph representation +* Create an arbitrary number of ``ov::InferRequest`` objects +* Hold some common resources shared between different instances of ``ov::InferRequest``. For example: - * ov::ICompiledModel::m_task_executor task executor to implement asynchronous execution - * ov::ICompiledModel::m_callback_executor task executor to run an asynchronous inference request callback in a separate thread + * ``ov::ICompiledModel::m_task_executor`` task executor to implement asynchronous execution + * ``ov::ICompiledModel::m_callback_executor`` task executor to run an asynchronous inference request callback in a separate thread CompiledModel Class ################### -OpenVINO Plugin API provides the interface ov::ICompiledModel which should be used as a base class for a compiled model. Based on that, a declaration of an compiled model class can look as follows: +OpenVINO Plugin API provides the interface ``ov::ICompiledModel`` which should be used as a base class +for a compiled model. Based on that, a declaration of an compiled model class can look as follows: .. doxygensnippet:: src/plugins/template/src/compiled_model.hpp :language: cpp @@ -32,9 +33,12 @@ Class Fields The example class has several fields: -* ``m_request_id`` - Tracks a number of created inference requests, which is used to distinguish different inference requests during profiling via the Intel® Instrumentation and Tracing Technology (ITT) library. +* ``m_request_id`` - Tracks a number of created inference requests, which is used to distinguish + different inference requests during profiling via the Intel® Instrumentation and Tracing Technology (ITT) library. * ``m_cfg`` - Defines a configuration a compiled model was compiled with. -* ``m_model`` - Keeps a reference to transformed ``ov::Model`` which is used in OpenVINO reference backend computations. Note, in case of other backends with backend specific graph representation ``m_model`` has different type and represents backend specific graph or just a set of computational kernels to perform an inference. +* ``m_model`` - Keeps a reference to transformed ``ov::Model`` which is used in OpenVINO reference + backend computations. Note, in case of other backends with backend specific graph representation + ``m_model`` has different type and represents backend specific graph or just a set of computational kernels to perform an inference. * ``m_loaded_from_cache`` - Allows to understand that model was loaded from cache. CompiledModel Constructor @@ -51,7 +55,11 @@ The implementation ``compile_model()`` is fully device-specific. compile_model() +++++++++++++++ -The function accepts a const shared pointer to ``ov::Model`` object and applies OpenVINO passes using ``transform_model()`` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in :doc:`Low Precision Transformations ` guide. +The function accepts a const shared pointer to ``ov::Model`` object and applies OpenVINO passes +using ``transform_model()`` function, which defines plugin-specific conversion pipeline. To support +low precision inference, the pipeline can include Low Precision Transformations. These +transformations are usually hardware specific. You can find how to use and configure Low Precisions +Transformations in :doc:`Low Precision Transformations ` guide. .. doxygensnippet:: src/plugins/template/src/compiled_model.cpp :language: cpp @@ -65,7 +73,8 @@ The function accepts a const shared pointer to ``ov::Model`` object and applies export_model() ++++++++++++++ -The implementation of the method should write all data to the ``model_stream``, which is required to import a backend specific graph later in the ``Plugin::import_model`` method: +The implementation of the method should write all data to the ``model_stream``, which is required +to import a backend specific graph later in the ``Plugin::import_model`` method: .. doxygensnippet:: src/plugins/template/src/compiled_model.cpp :language: cpp @@ -80,14 +89,25 @@ The method creates an synchronous inference request and returns it. :language: cpp :fragment: [compiled_model:create_sync_infer_request] -While the public OpenVINO API has a single interface for inference request, which can be executed in synchronous and asynchronous modes, a plugin library implementation has two separate classes: +While the public OpenVINO API has a single interface for inference request, which can be executed +in synchronous and asynchronous modes, a plugin library implementation has two separate classes: * :doc:`Synchronous inference request `, which defines pipeline stages and runs them synchronously in the ``infer`` method. -* :doc:`Asynchronous inference request `, which is a wrapper for a synchronous inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, it can have one or several stages: - - * For single-stage pipelines, there is no need to define this method and create a class derived from ov::IAsyncInferRequest. For single stage pipelines, a default implementation of this method creates ov::IAsyncInferRequest wrapping a synchronous inference request and runs it asynchronously in the ``m_request_executor`` executor. - * For pipelines with multiple stages, such as performing some preprocessing on host, uploading input data to a device, running inference on a device, or downloading and postprocessing output data, schedule stages on several task executors to achieve better device use and performance. You can do it by creating a sufficient number of inference requests running in parallel. In this case, device stages of different inference requests are overlapped with preprocessing and postprocessing stage giving better performance. +* :doc:`Asynchronous inference request `, which is a wrapper for a synchronous + inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, + it can have one or several stages: + + * For single-stage pipelines, there is no need to define this method and create a class derived + from ``ov::IAsyncInferRequest``. For single stage pipelines, a default implementation of this + method creates ``ov::IAsyncInferRequest`` wrapping a synchronous inference request and runs + it asynchronously in the ``m_request_executor`` executor. + * For pipelines with multiple stages, such as performing some preprocessing on host, uploading + input data to a device, running inference on a device, or downloading and postprocessing output + data, schedule stages on several task executors to achieve better device use and performance. + You can do it by creating a sufficient number of inference requests running in parallel. + In this case, device stages of different inference requests are overlapped with preprocessing + and postprocessing stage giving better performance. .. important:: diff --git a/docs/articles_en/documentation/openvino-extensibility/transformation-api.rst b/docs/articles_en/documentation/openvino-extensibility/transformation-api.rst index c179e628f6d0db..840b93adddc75f 100644 --- a/docs/articles_en/documentation/openvino-extensibility/transformation-api.rst +++ b/docs/articles_en/documentation/openvino-extensibility/transformation-api.rst @@ -18,14 +18,17 @@ Overview of Transformations API transformation-api/graph-rewrite-pass transformation-api/patterns-python-api -OpenVINO Transformation mechanism allows to develop transformation passes to modify ``ov::Model``. You can use this mechanism to apply additional optimizations to the original Model or transform unsupported subgraphs and operations to new operations supported by the plugin. +OpenVINO Transformation mechanism allows to develop transformation passes to modify ``ov::Model``. +You can use this mechanism to apply additional optimizations to the original Model or transform +unsupported subgraphs and operations to new operations supported by the plugin. This guide contains all the necessary information to start implementing OpenVINO™ transformations. Working with Model ################## Before moving to the transformation part, it is important to say a few words about the functions which allow modifying ``ov::Model``. -This section extends the :doc:`model representation guide <../../openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation>` and introduces an API for ``ov::Model`` manipulation. +This section extends the :doc:`model representation guide <../../openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation>` +and introduces an API for ``ov::Model`` manipulation. Working with node input and output ports ++++++++++++++++++++++++++++++++++++++++ @@ -33,7 +36,8 @@ Working with node input and output ports Each OpenVINO operation has ``ov::Node`` input and output ports, except for ``Parameter`` and ``Constant`` types. The terms ``node`` and ``operation`` are used interchangeably in OpenVINO, but this article will maintain consistency in their use. -Every port is associated with a node, allowing access to the node it belongs to, including its shape, type, all consumers for output ports and the producer node for input ports. +Every port is associated with a node, allowing access to the node it belongs to, including +its shape, type, all consumers for output ports and the producer node for input ports. Take a look at the code example: @@ -44,7 +48,8 @@ Take a look at the code example: Node replacement ++++++++++++++++ -OpenVINO™ provides two ways for node replacement: via OpenVINO™ helper function and directly via port methods. We are going to review both of them. +OpenVINO™ provides two ways for node replacement: via OpenVINO™ helper function and directly +via port methods. We are going to review both of them. Let's start with OpenVINO™ helper functions. The most popular function is ``ov::replace_node(old_node, new_node)``. @@ -56,7 +61,8 @@ Let's review a replacement case where a Negative operation is replaced with Mult :language: cpp :fragment: [ov:replace_node] -``ov::replace_node`` has a constraint that number of output ports for both nodes must be the same. Otherwise, the attempt to replace the nodes will result in an exception. +``ov::replace_node`` has a constraint that number of output ports for both nodes must be the same. +Otherwise, the attempt to replace the nodes will result in an exception. The alternative way to do the same replacement is the following: @@ -109,7 +115,8 @@ Transformation conditional compilation Transformation library has two internal macros to support conditional compilation feature. -* ``MATCHER_SCOPE(region)`` - allows to disable the MatcherPass if matcher isn't used. The region name should be unique. This macro creates a local variable ``matcher_name`` which you should use as a matcher name. +* ``MATCHER_SCOPE(region)`` - allows to disable the MatcherPass if matcher isn't used. The region + name should be unique. This macro creates a local variable ``matcher_name`` which you should use as a matcher name. * ``RUN_ON_MODEL_SCOPE(region)`` - allows to disable run_on_model pass if it isn't used. The region name should be unique. .. _transformation_writing_essentials: @@ -122,22 +129,29 @@ To develop a transformation, follow these transformation rules: 1. Friendly Names +++++++++++++++++ -Each ``ov::Node`` has a unique name and a friendly name. In transformations, only the friendly name matters because it represents the name from the model's perspective. -To prevent losing the friendly name when replacing a node with another node or a subgraph, the original friendly name is set to the last node in the replacing subgraph. See the example below. +Each ``ov::Node`` has a unique name and a friendly name. In transformations, only the friendly +name matters because it represents the name from the model's perspective. +To prevent losing the friendly name when replacing a node with another node or a subgraph, +the original friendly name is set to the last node in the replacing subgraph. See the example below. .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp :language: cpp :fragment: [ov:replace_friendly_name] -In more complicated cases, when a replaced operation has several outputs and additional consumers are added to its outputs, the decision on how to set the friendly name is determined by an agreement. +In more complicated cases, when a replaced operation has several outputs and additional +consumers are added to its outputs, the decision on how to set the friendly name is determined by an agreement. 2. Runtime Info +++++++++++++++ -Runtime info is a map ``std::map`` located inside the ``ov::Node`` class. It represents additional attributes of the ``ov::Node``. -These attributes, which can be set by users or plugins, need to be preserved when executing a transformation that changes ``ov::Model``, as they are not automatically propagated. -In most cases, transformations have the following types: 1:1 (replace node with another node), 1:N (replace node with a sub-graph), N:1 (fuse sub-graph into a single node), N:M (any other transformation). -Currently, there is no mechanism that automatically detects transformation types, so this runtime information needs to be propagated manually. See the example below: +Runtime info is a map ``std::map`` located inside the ``ov::Node`` class. +It represents additional attributes of the ``ov::Node``. +These attributes, which can be set by users or plugins, need to be preserved when executing +a transformation that changes ``ov::Model``, as they are not automatically propagated. +In most cases, transformations have the following types: 1:1 (replace node with another node), +1:N (replace node with a sub-graph), N:1 (fuse sub-graph into a single node), N:M (any other transformation). +Currently, there is no mechanism that automatically detects transformation types, so this +runtime information needs to be propagated manually. See the example below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp @@ -146,12 +160,16 @@ Currently, there is no mechanism that automatically detects transformation types When a transformation has multiple fusions or decompositions, ``ov::copy_runtime_info`` must be called multiple times for each case. -.. note:: ``copy_runtime_info`` removes ``rt_info`` from destination nodes. If you want to keep it, specify them in source nodes as following: ``copy_runtime_info({a, b, c}, {a, b})`` +.. note:: + + ``copy_runtime_info`` removes ``rt_info`` from destination nodes. If you want to keep it, + specify them in source nodes as following: ``copy_runtime_info({a, b, c}, {a, b})`` 3. Constant Folding +++++++++++++++++++ -If your transformation inserts constant sub-graphs that need to be folded, do not forget to use ``ov::pass::ConstantFolding()`` after your transformation or call constant folding directly for operation. +If your transformation inserts constant sub-graphs that need to be folded, do not forget +to use ``ov::pass::ConstantFolding()`` after your transformation or call constant folding directly for operation. The example below shows how constant subgraph can be constructed. .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp @@ -174,8 +192,12 @@ Common mistakes in transformations In transformation development process: * Do not use deprecated OpenVINO™ API. Deprecated methods are marked with ``OPENVINO_DEPRECATED`` macro in their definition. -* Do not pass ``shared_ptr`` as input for another node if the type of the node is unknown or if it has multiple outputs. Instead, use explicit output ports. -* If you replace a node with another node that produces different shape, note that the new shape will not be propagated until the first ``validate_nodes_and_infer_types`` call for ``ov::Model``. If you are using ``ov::pass::Manager``, it will automatically call this method after each transformation execution. +* Do not pass ``shared_ptr`` as input for another node if the type of the node is unknown + or if it has multiple outputs. Instead, use explicit output ports. +* If you replace a node with another node that produces different shape, note that + the new shape will not be propagated until the first ``validate_nodes_and_infer_types`` + call for ``ov::Model``. If you are using ``ov::pass::Manager``, it will automatically call + this method after each transformation execution. * Do not forget to call the ``ov::pass::ConstantFolding`` pass if your transformation creates constant subgraphs. * Use latest OpSet if you are not developing downgrade transformation pass. * When developing a callback for ``ov::pass::MatcherPass``, do not change nodes that come after the root node in the topological order. @@ -185,9 +207,11 @@ In transformation development process: Using pass manager ################## -``ov::pass::Manager`` is a container class that can store a list of transformations and execute them. The main idea of this class is to have a high-level representation for grouped list of transformations. -It can register and apply any `transformation pass <#transformations_types>`__ on a model. -In addition, ``ov::pass::Manager`` has extended debug capabilities (find more information in the `how to debug transformations <#how-to-debug-transformations>`__ section). +``ov::pass::Manager`` is a container class that can store a list of transformations and execute them. +The main idea of this class is to have a high-level representation for grouped list of transformations. +It can register and apply any `transformation pass <#transformations-types>`__ on a model. +In addition, ``ov::pass::Manager`` has extended debug capabilities (find more information +in the `how to debug transformations <#how-to-debug-transformations>`__ section). The example below shows basic usage of ``ov::pass::Manager`` @@ -206,7 +230,8 @@ Another example shows how multiple matcher passes can be united into single Grap How to debug transformations ############################ -If you are using ``ov::pass::Manager`` to run sequence of transformations, you can get additional debug capabilities by using the following environment variables: +If you are using ``ov::pass::Manager`` to run sequence of transformations, you can get +additional debug capabilities by using the following environment variables: .. code-block:: cpp @@ -214,7 +239,9 @@ If you are using ``ov::pass::Manager`` to run sequence of transformations, you c OV_ENABLE_VISUALIZE_TRACING=1 - enables visualization after each transformation. By default, it saves dot and svg files. -.. note:: Make sure that you have dot installed on your machine; otherwise, it will silently save only dot file without svg file. +.. note:: + + Make sure that you have dot installed on your machine; otherwise, it will silently save only dot file without svg file. See Also ######## diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst index 2aa4a1c0a7d558..46706916ba5a2d 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst @@ -12,8 +12,7 @@ opset1 This specification document describes ``opset1`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset1``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset1``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst index c40d5f494798e4..feb3af0956f5f8 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst @@ -12,8 +12,7 @@ opset10 This specification document describes the ``opset10`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset10``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset10``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst index 14049ddbb7d369..20b76ab84ced0c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst @@ -12,8 +12,7 @@ opset11 This specification document describes the ``opset11`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset11``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset11``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst index 23151563d5bb1b..a277eaebb936fc 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst @@ -12,8 +12,7 @@ opset12 This specification document describes the ``opset12`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset12``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset12``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst index 5488a1bcd52340..578ab78105772f 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst @@ -12,8 +12,7 @@ opset13 This specification document describes the ``opset13`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset13``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset13``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst index b06528d3b27dea..d582e655243efb 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst @@ -12,8 +12,7 @@ opset14 This specification document describes the ``opset14`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset14``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset14``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst index 234c4815c24bbb..e189bd831274d8 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst @@ -12,8 +12,7 @@ opset2 This specification document describes ``opset2`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset2``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset2``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst index 42556d6b18e221..5c321bae1a04dc 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst @@ -12,8 +12,7 @@ opset3 This specification document describes ``opset3`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset3``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset3``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset4.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset4.rst index c28fc85578d14b..fd603a2cd8e839 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset4.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset4.rst @@ -12,8 +12,7 @@ opset4 This specification document describes ``opset4`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset4``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset4``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst index e3189cc701fcf5..0d2c6fabfd1648 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst @@ -12,8 +12,7 @@ opset5 This specification document describes ``opset5`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset5``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset5``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst index b00c2dc9553bbb..e0d900002c2109 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst @@ -12,8 +12,7 @@ opset6 This specification document describes ``opset6`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset6``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset6``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst index 5232f8ba1d395d..415bc2109f7c23 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst @@ -12,8 +12,7 @@ opset7 This specification document describes the ``opset7`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset7``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset7``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst index cbf52e53a4dc95..0eb801911ba467 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst @@ -12,8 +12,7 @@ opset8 This specification document describes the ``opset8`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset8``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset8``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst index 70ba9a6fa10757..6f33b798865a68 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst @@ -12,8 +12,7 @@ opset9 This specification document describes the ``opset9`` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes -declared in ``namespace opset9``. +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset9``. Table of Contents diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/scatter-nd-update-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/scatter-nd-update-15.rst index 283779d6368caa..794fe707695ff8 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/scatter-nd-update-15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/scatter-nd-update-15.rst @@ -16,7 +16,7 @@ ScatterNDUpdate **Detailed description**: The operation produces a copy of ``data`` tensor and updates its value using logic from ``reduction`` attribute, using values specified by ``updates`` at specific index positions specified by ``indices``. The output shape is the same as the shape of ``data``. -Input ``indices`` can contain duplicated index values, however, in case when *reduction* is set to ``none``, only last update for given duplicated index is used. +If multiple indices point to the same output location then the order of updating the values is undefined. The last dimension of ``indices`` corresponds to indices into elements if ``indices.shape[-1]`` = ``data.shape.rank`` or slices if ``indices.shape[-1]`` < ``data.shape.rank``. @@ -41,8 +41,9 @@ Operator ScatterNDUpdate-15 is an equivalent to following NumPy snippet: elif reduction == "min": func = min out = np.copy(data) + # Order of loop iteration is undefined. for ndidx in np.ndindex(indices.shape[:-1]): - out[indices[ndidx]] = func(out[indices[ndidx]], updates[ndidx]) + out[tuple(indices[ndidx])] = func(tuple(out[indices[ndidx]]), updates[ndidx]) return out Example 1 that shows simple case of update with *reduction* set to ``none``.: @@ -52,7 +53,7 @@ Example 1 that shows simple case of update with *reduction* set to ``none``.: data = [1, 2, 3, 4, 5, 6, 7, 8] indices = [[4], [3], [1], [7], [-2], [-4]] updates = [9, 10, 11, 12, 13, 14] - output = [1, 11, 3, 10, 14, 6, 13, 12] + output = [1, 11, 3, 10, 4, 6, 13, 12] Example that shows update of two slices of ``4x4`` shape in ``data``, with *reduction* set to ``none``: @@ -87,7 +88,7 @@ Example that shows update of two slices of ``4x4`` shape in ``data``, with *redu * **1**: ``data`` tensor of arbitrary rank ``r`` >= 1 and of type *T*. **Required.** -* **2**: ``indices`` tensor with indices of arbitrary rank ``q`` >= 1 and of type *T_IND*. All index values ``i_j`` in index entry ``(i_0, i_1, ...,i_k)`` (where ``k = indices.shape[-1]``) must be within bounds ``[-s_j, s_j - 1]`` where ``s_j = data.shape[j]``. ``k`` must be at most ``r``. If multiple indices point to the same output location then values will be updated in order of their occurrence. Negative value of index means reverse indexing and will be normalized to value ``len(data.shape[j] + index)``. If an index points to non-existing element then exception is raised. **Required.** +* **2**: ``indices`` tensor with indices of arbitrary rank ``q`` >= 1 and of type *T_IND*. All index values ``i_j`` in index entry ``(i_0, i_1, ...,i_k)`` (where ``k = indices.shape[-1]``) must be within bounds ``[-s_j, s_j - 1]`` where ``s_j = data.shape[j]``. ``k`` must be at most ``r``. If multiple indices point to the same output location then the order of updating the values is undefined. Negative value of index means reverse indexing and will be normalized to value ``len(data.shape[j] + index)``. If an index points to non-existing element then exception is raised. **Required.** * **3**: ``updates`` tensor of rank ``r - indices.shape[-1] + q - 1`` of type *T*. If expected ``updates`` rank is 0D it can be a tensor with single element. **Required.** @@ -121,7 +122,7 @@ Example that shows update of two slices of ``4x4`` shape in ``data``, with *redu - 4 + 4 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/scaled-dot-product-attention.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/scaled-dot-product-attention.rst index 922048a4f18754..2d18b9d3a6488c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/scaled-dot-product-attention.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/scaled-dot-product-attention.rst @@ -60,11 +60,11 @@ omitting training-related parameter. * **4**: ``attention_mask`` - two options available. ``attention_mask`` is ignored if ``causal`` is set to ``True``. **Optional.** - * at least 3 dimensional tensor of type *T* or ``boolean`` and shape ``[N, ..., L, S]``. + * at least 2 dimensional tensor of type *T* or ``boolean`` and shape numpy-broadcastable to ``[N, ..., L, S]``. See :doc:`Numpy Broadcast Rules <../../broadcast-rules>` for broadcast details. * a scalar of type *T* with value ``0``. Scalar zero value signals that applying an attention mask is not necessary (similar to specifying attention_mask=None in the provided pseudo-code). -* **5**: ``scale`` a scalar tensor of type *T*, an alternative scale factor instead of 1/sqrt(query.shape[-1]) used by default in the pseudo-code above. **Optional.** +* **5**: ``scale`` a scalar or single element 1D tensor of type *T*, an alternative scale factor instead of 1/sqrt(query.shape[-1]) used by default in the pseudo-code above. **Optional.** **Outputs** @@ -78,7 +78,7 @@ omitting training-related parameter. **Dimensions** -* ``N, ...`` - one or more batch dimensions. Each batch dimension should be either constant across the input tensors (query, key, and value), indicating that they have the same batch size, or they should be broadcastable to the same value. +* ``N, ...`` - one or more batch dimensions. Each batch dimension should be either constant across the input tensors (query, key, and value), indicating that they have the same batch size, or they should be numpy-broadcastable to the same value. See :doc:`Numpy Broadcast Rules <../../broadcast-rules>` for broadcast details. * ``S`` - source sequence length @@ -192,29 +192,29 @@ Other batch dimensions ``...`` are optional. - 1 + 4 6 - 5 + 10 -1 80 - 2 (repeat 2 times) - 2 (repeat 3 times) - 2 (repeat 5 times) + 1 + 6 + 10 -1 80 - 4 - 3 - 10 + 1 + 1 + 1 -1 80 1 - 2 + 1 1 -1 -1 @@ -231,3 +231,42 @@ Other batch dimensions ``...`` are optional. + +*Example 5: With attention mask broadcasting* + +.. code-block:: xml + :force: + + + + + + + 2 + 16 + 80 + + + 2 + 32 + 80 + + + 2 + 32 + 80 + + + 2 + 1 + 1 + + + + + 2 + 16 + 80 + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-1.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-1.rst index ae6db8fb5353a4..c6f163536476f2 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-1.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-1.rst @@ -20,7 +20,15 @@ Squeeze * With the second input provided, each value is an index of a dimension from the first tensor that is to be removed. Specified dimension should be equal to 1, otherwise it will be ignored and copied as is. Dimension indices can be specified directly, or by negative indices (counting dimensions from the end). -Note: Updated behavior since 2024.3, request of squeezing dimension not equal to 1 is expected to be ignored instead of causing an error. +.. note:: + + Behavior before 2024.3 OpenVINO release: Error is raised when dimension to squeeze is not compatible with 1. + +.. note:: + + - If index of the dimension to squeeze is provided as a constant input and it points to a dynamic dimension that might be `1`, then the dimension is considered as squeezable. Therefore the rank of the output shape will be reduced, but not dynamic. + - If the input with indices is empty or not provided, dynamic dimension compatible with `1` leads to dynamic rank of the output shape. + **Attributes**: *Squeeze* operation doesn't have attributes. diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-offsets-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-offsets-15.rst new file mode 100644 index 00000000000000..9f0392c8e2d038 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-offsets-15.rst @@ -0,0 +1,184 @@ +.. {#openvino_docs_ops_sparse_EmbeddingBagOffsets_15} + +EmbeddingBagOffsets +====================== + + +.. meta:: + :description: Learn about EmbeddingBagOffsets-15 - a sparse operation, which + can be performed on three required and two optional input tensors. + +**Versioned name**: *EmbeddingBagOffsets-15* + +**Category**: *Sparse* + +**Short description**: Computes sums or means of "bags" of embeddings, without instantiating the intermediate embeddings. + +**Detailed description**: + +Operation EmbeddingBagOffsets is an implementation of ``torch.nn.EmbeddingBag`` with indices and offsets inputs being 1D tensors. + +For each index in ``indices`` this operator gathers values from ``emb_table`` embedding table. Then values at indices in the range of the same bag (based on ``offset`` input) are reduced according to ``reduction`` attribute. + +Values in ``offsets`` define starting index in ``indices`` tensor of each "bag", +e.g. ``offsets`` with value ``[0, 3, 4, 4, 6]`` define 5 "bags" containing ``[3, 1, 0, 2, num_indices-6]`` elements corresponding to ``[indices[0:3], indices[3:4], empty_bag, indices[4:6], indices[6:]]`` slices of indices per bag. + +EmbeddingBagOffsets is an equivalent to following NumPy snippet: + +.. code-block:: py + + def embedding_bag_offsets( + emb_table: np.ndarray, + indices: np.ndarray, + offsets: np.ndarray, + default_index: Optional[int] = None, + per_sample_weights: Optional[np.ndarray] = None, + reduction: Literal["sum", "mean"] = "sum", + ): + assert ( + reduction == "sum" or per_sample_weights is None + ), "Attribute per_sample_weights is only supported in sum reduction." + if per_sample_weights is None: + per_sample_weights = np.ones_like(indices) + embeddings = [] + for emb_idx, emb_weight in zip(indices, per_sample_weights): + embeddings.append(emb_table[emb_idx] * emb_weight) + previous_offset = offsets[0] + bags = [] + offsets = np.append(offsets, len(indices)) + for bag_offset in offsets[1:]: + bag_size = bag_offset - previous_offset + if bag_size != 0: + embedding_bag = embeddings[previous_offset:bag_offset] + reduced_bag = np.add.reduce(embedding_bag) + if reduction == "mean": + reduced_bag = reduced_bag / bag_size + bags.append(reduced_bag) + else: + # Empty bag case + if default_index is not None and default_index != -1: + bags.append(emb_table[default_index]) + else: + bags.append(np.zeros(emb_table.shape[1:])) + previous_offset = bag_offset + return np.stack(bags, axis=0) + + +**Attributes**: + +* *reduction* + + * **Description**: reduction mode. + * **Range of values**: + + * sum - compute weighted sum, using corresponding values of ``per_sample_weights`` as weights if provided. + * mean - compute average of values in bag. Input ``per_sample_weights`` is not supported and will raise exception. + + * **Type**: ``string`` + * **Default value**: sum + * **Required**: *no* + +**Inputs**: + +* **1**: ``emb_table`` tensor containing the embedding lookup table of the module of shape ``[num_emb, emb_dim1, emb_dim2, ...]`` and of type *T*. **Required.** +* **2**: ``indices`` tensor of shape ``[num_indices]`` and of type *T_IND*. **Required.** +* **3**: ``offsets`` tensor of shape ``[batch]`` and of type *T_IND* containing the starting index positions of each "bag" in ``indices``. Maximum value of offsets cannot be greater than length of ``indices``. **Required.** +* **4**: ``default_index`` scalar of type *T_IND* containing default index in embedding table to fill empty "bags". If set to ``-1`` or not provided, empty "bags" are filled with zeros. Reverse indexing using negative values is not supported. **Optional.** +* **5**: ``per_sample_weights`` tensor of the same shape as ``indices`` and of type *T*. Supported only when *reduction* attribute is set to ``"sum"``. Each value in this tensor are multiplied with each value pooled from embedding table for each index. Optional, default is tensor of ones. **Optional.** + +**Outputs**: + +* **1**: tensor of shape ``[batch, emb_dim1, emb_dim2, ...]`` and of type *T* containing embeddings for each bag. + +**Types** + +* *T*: any numeric type. +* *T_IND*: ``int32`` or ``int64``. + +**Example** + +*Example 1: per_sample_weights are provided, default_index is set to 0 to fill empty bag with values gathered form emb_table on given index.* + +.. code-block:: xml + + + + + + 5 + 2 + + + 4 + + + 3 + + + + 4 + + + + + 3 + 2 + + + + +*Example 2: per_sample_weights are provided, default_index is set to -1 to fill empty bag with 0.* + +.. code-block:: xml + + + + + + 5 + 2 + + + 4 + + + 3 + + + + 4 + + + + + 3 + 2 + + + + +*Example 3: Example of reduction set to mean.* + +.. code-block:: xml + + + + + + 5 + 2 + + + 4 + + + 3 + + + + + 3 + 2 + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-offsets-sum-3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-offsets-sum-3.rst index 0a0cb67afb0f06..c3eb163b16d98f 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-offsets-sum-3.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-offsets-sum-3.rst @@ -14,7 +14,48 @@ EmbeddingBagOffsetsSum **Short description**: Computes sums of "bags" of embeddings, without instantiating the intermediate embeddings. -**Detailed description**: This is the second case of the PyTorch `EmbeddingBag `__ , it has indices in two 1D tensors provided as 2nd and 3rd inputs. For each index in ``indices`` this operator gets values from ``data`` embedding table and sums all values belonging to each bag. Values in ``offsets`` define starting index in ``indices`` tensor of each "bag", e.g. ``offsets`` with value ``[0,3,4,4,6]`` define 5 "bags" containing ``[3,1,0,2,n-6]`` elements. +**Detailed description**: + +Operation EmbeddingBagOffsets is an implementation of ``torch.nn.EmbeddingBag`` with indices and offsets inputs being 1D tensors. + +For each index in ``indices`` this operator gathers values from ``emb_table`` embedding table. Then values at indices in the range of the same bag (based on ``offset`` input) are reduced according to ``reduction`` attribute. + +Values in ``offsets`` define starting index in ``indices`` tensor of each "bag", +e.g. ``offsets`` with value ``[0, 3, 4, 4, 6]`` define 5 "bags" containing ``[3, 1, 0, 2, num_indices-6]`` elements corresponding to ``[indices[0:3], indices[3:4], empty_bag, indices[4:6], indices[6:]]`` slices of indices per bag. + +EmbeddingBagOffsetsSum is an equivalent to following NumPy snippet: + +.. code-block:: py + + def embedding_bag_offsets( + emb_table: np.ndarray, + indices: np.ndarray, + offsets: np.ndarray, + default_index: Optional[int] = None, + per_sample_weights: Optional[np.ndarray] = None, + ): + if per_sample_weights is None: + per_sample_weights = np.ones_like(indices) + embeddings = [] + for emb_idx, emb_weight in zip(indices, per_sample_weights): + embeddings.append(emb_table[emb_idx] * emb_weight) + previous_offset = offsets[0] + bags = [] + offsets = np.append(offsets, len(indices)) + for bag_offset in offsets[1:]: + bag_size = bag_offset - previous_offset + if bag_size != 0: + embedding_bag = embeddings[previous_offset:bag_offset] + reduced_bag = np.add.reduce(embedding_bag) + bags.append(reduced_bag) + else: + # Empty bag case + if default_index is not None and default_index != -1: + bags.append(emb_table[default_index]) + else: + bags.append(np.zeros(emb_table.shape[1:])) + previous_offset = bag_offset + return np.stack(bags, axis=0) **Attributes**: EmbeddingBagOffsetsSum operation has no attributes. @@ -22,9 +63,9 @@ EmbeddingBagOffsetsSum * **1**: ``emb_table`` tensor containing the embedding lookup table of the module of shape ``[num_emb, emb_dim1, emb_dim2, ...]`` and of type *T*. **Required.** * **2**: ``indices`` tensor of shape ``[num_indices]`` and of type *T_IND*. **Required.** -* **3**: ``offsets`` tensor of shape ``[batch]`` and of type *T_IND* containing the starting index positions of each "bag" in ``indices``. **Required.** -* **4**: ``default_index`` scalar of type *T_IND* containing default index in embedding table to fill empty "bags". If not provided empty "bags" are filled with zeros. **Optional.** -* **5**: ``per_sample_weights`` tensor of the same shape as ``indices`` and of type *T*. Each value in this tensor are multiplied with each value pooled from embedding table for each index. Optional, default is tensor of ones. +* **3**: ``offsets`` tensor of shape ``[batch]`` and of type *T_IND* containing the starting index positions of each "bag" in ``indices``. Maximum value of offsets cannot be greater than length of ``indices``. **Required.** +* **4**: ``default_index`` scalar of type *T_IND* containing default index in embedding table to fill empty "bags". If set to ``-1`` or not provided, empty "bags" are filled with zeros. Reverse indexing using negative values is not supported. **Optional.** +* **5**: ``per_sample_weights`` tensor of the same shape as ``indices`` and of type *T*. Each value in this tensor are multiplied with each value pooled from embedding table for each index. Optional, default is tensor of ones. **Optional.** **Outputs**: @@ -37,7 +78,9 @@ EmbeddingBagOffsetsSum **Example** -.. code-block:: cpp +*Example 1: per_sample_weights are provided, default_index is set to 0 to fill empty bag with values gathered form emb_table on given index.* + +.. code-block:: xml @@ -52,7 +95,7 @@ EmbeddingBagOffsetsSum 3 - + 4 @@ -64,4 +107,31 @@ EmbeddingBagOffsetsSum +*Example 2: per_sample_weights are provided, default_index is set to -1 to fill empty bag with 0.* + +.. code-block:: xml + + + + 5 + 2 + + + 4 + + + 3 + + + + 4 + + + + + 3 + 2 + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-packed-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-packed-15.rst new file mode 100644 index 00000000000000..2892d49759f667 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-packed-15.rst @@ -0,0 +1,131 @@ +.. {#openvino_docs_ops_sparse_EmbeddingBagPacked_15} + +EmbeddingBagPacked +===================== + + +.. meta:: + :description: Learn about EmbeddingBagPacked-15 - a sparse operation, which + can be performed on two required and one optional input tensor. + +**Versioned name**: *EmbeddingBagPacked-15* + +**Category**: *Sparse* + +**Short description**: Computes sums or means of "bags" of embeddings, without instantiating the intermediate embeddings. + +**Detailed description**: + +Operation EmbeddingBagPacked is an implementation of ``torch.nn.EmbeddingBag`` with indices input being 2D tensor of shape ``[batch, indices_per_bag]``. +Operation is equivalent to *gather_op = Gather(emb_table, indices, axis=0)* followed by reduction: + + * *sum* - *ReduceSum(Multiply(gather_op, Unsqueeze(per_sample_weights, -1)), axis=1)*, + * *mean* - *ReduceMean(gather_op, axis=1)*. + +**Attributes**: + +* *reduction* + + * **Description**: reduction mode. + * **Range of values**: + + * sum - compute weighted sum, using corresponding values of ``per_sample_weights`` as weights if provided. + * mean - compute average of values in bag. Input ``per_sample_weights`` is not supported and will raise exception. + + * **Type**: ``string`` + * **Default value**: sum + * **Required**: *no* + +**Inputs**: + +* **1**: ``emb_table`` tensor containing the embedding lookup table of the module of shape ``[num_emb, emb_dim1, emb_dim2, ...]`` and of type *T*. **Required.** +* **2**: ``indices`` tensor of shape ``[batch, indices_per_bag]`` and of type *T_IND*. **Required.** +* **3**: ``per_sample_weights`` tensor of the same shape as ``indices`` and of type *T* supported only in ``sum`` mode. Each value in this tensor are multiplied with each value pooled from embedding table for each index. Optional, default is tensor of ones. **Optional.** + +**Outputs**: + +* **1**: tensor of shape ``[batch, emb_dim1, emb_dim2, ...]`` and of type *T* containing embeddings for each bag. + +**Types** + +* *T*: any numeric type. +* *T_IND*: ``int32`` or ``int64``. + +**Example** + +*Example 1: reduction set to sum, per_sample_weights are not provided.* + +.. code-block:: xml + + + + + + 5 + 2 + + + 3 + 2 + + + + + 3 + 2 + + + + +*Example 2: reduction set to sum and per_sample_weights are provided.* + +.. code-block:: xml + + + + + + 5 + 2 + + + 3 + 2 + + + 3 + 2 + + + + + 3 + 2 + + + + +*Example 3: reduction set to mean, per_sample_weights are not provided.* + +.. code-block:: xml + + + + + + 5 + 2 + + + 3 + 2 + + + + + 3 + 2 + + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-packed-sum-3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-packed-sum-3.rst index 9ef623ca7755eb..b6cad12be869ac 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-packed-sum-3.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sparse/embedding-bag-packed-sum-3.rst @@ -14,7 +14,10 @@ EmbeddingBagPackedSum **Short description**: Computes sums of "bags" of embeddings, without instantiating the intermediate embeddings. -**Detailed description**: This is the first case of the PyTorch `EmbeddingBag `__ , it has indices in the tensor of format ``[batch, indices_per_bag]``. If 3rd input is not provided, this operation is equivalent to *Gather* followed by *ReduceSum(axis=0)*. However, *EmbeddingBagPackedSum* is much more time and memory efficient than using a chain of these operations. +**Detailed description**: + +Operation EmbeddingBagPackedSum is an implementation of ``torch.nn.EmbeddingBag`` in ``sum`` mode, which indices input being 2D tensor of shape ``[batch, indices_per_bag]``. +Operation is equivalent to *ReduceSum(Multiply(Gather(emb_table, indices, axis=0), Unsqueeze(per_sample_weights, -1)), axis=1)*. **Attributes**: EmbeddingBagPackedSum operation has no attributes. @@ -35,7 +38,7 @@ EmbeddingBagPackedSum **Example** -.. code-block:: cpp +.. code-block:: xml @@ -47,13 +50,13 @@ EmbeddingBagPackedSum 3 2 - + 3 2 - + 3 2 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/string-tensor-pack-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/string-tensor-pack-15.rst new file mode 100644 index 00000000000000..77f44f8d687741 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/string-tensor-pack-15.rst @@ -0,0 +1,147 @@ +.. {#openvino_docs_ops_type_StringTensorPack_15} + +StringTensorPack +=================== + + +.. meta:: + :description: Learn about StringTensorPack-15 - operation which packs a concatenated batch of strings into a batched string tensor. + +**Versioned name**: *StringTensorPack-15* + +**Category**: *Type* + +**Short description**: *StringTensorPack* transforms a concatenated strings data (encoded as 1D tensor of u8 element type) into +a string tensor using *begins* and *ends* indices. + +**Detailed description** + +Consider inputs: + +* *begins* = [0, 5] +* *ends* = [5, 13] +* *symbols* = "IntelOpenVINO" + +*StringTensorPack* uses indices from ``begins`` and ``ends`` to transform concatenated string ``symbols`` into ``output``, +a string tensor. The ``output.shape`` is equal to ``begins.shape`` and ``ends.shape``, +and in this case ``output`` holds values ``["Intel", "OpenVINO"]``. + +When defining *begins* and *ends*, the notation ``[a, b)`` is used. This means that the range starts with ``a`` and includes all values up to, +but not including, ``b``. That is why in the example given the length of "IntelOpenVINO" is 12, but *ends* vector contains 13. The shapes of ``begins`` and ``ends`` are required to be equal. + +**Inputs** + +* **1**: ``begins`` - ND tensor of non-negative integer numbers of type *T_IDX*, containing indices of each string's beginnings. **Required.** + +* **2**: ``ends`` - ND tensor of non-negative integer numbers of type *T_IDX*, containing indices of each string's endings. **Required.** + +* **3**: ``symbols`` - 1D tensor of concatenated strings data encoded in utf-8 bytes, of type *u8*. **Required.** + +**Outputs** + +* **1**: ``output`` - ND string tensor of the same shape as *begins* and *ends*. + +**Types** + +* *T_IDX*: ``int32`` or ``int64``. + +**Examples** + +*Example 1: 1D begins and ends* + +.. code-block:: xml + :force: + + + + + 2 + + + 2 + + + 13 + + + + + 2 + + + + +*Example 2: input with an empty string* + +.. code-block:: xml + :force: + + + + + 2 + + + 2 + + + 13 + + + + + 5 + + + + +*Example 3: skipped symbols* + +.. code-block:: xml + :force: + + + + + 2 + + + 2 + + + 13 + + + + + 5 + + + + +*Example 4: 2D begins and ends* + +.. code-block:: xml + :force: + + + + + 2 + 2 + + + 2 + 2 + + + 21 + + + + + 2 + 2 + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/string-tensor-unpack-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/string-tensor-unpack-15.rst new file mode 100644 index 00000000000000..8ae8a8f8234b21 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/string-tensor-unpack-15.rst @@ -0,0 +1,133 @@ +.. {#openvino_docs_ops_type_StringTensorUnpack_15} + +StringTensorUnpack +=================== + + +.. meta:: + :description: Learn about StringTensorUnpack-15 - operation which unpacks a batch of strings into three tensors. + +**Versioned name**: *StringTensorUnpack-15* + +**Category**: *Type* + +**Short description**: *StringTensorUnpack* operation transforms a given batch of strings into three tensors - two storing begin +and end indices of the strings and another containing the concatenated string data, respectively. + +**Detailed description** + +Consider an ``input`` string tensor containing values ``["Intel", "OpenVINO"]``. + +The operator will transform the tensor into three outputs: + +* *begins* = [0, 5] + * ``begins[0]`` is equal to 0, because the first string starts at the beginning index. + * ``begins[1]`` is equal to 5, because length of the string "Intel" is equal to 5. + * ``begins.shape`` is equal to [2], because the ``input`` is a batch of 2 strings. + +* *ends* = [5, 13] + * ``ends[0]`` is equal to 5, because length of the string "Intel" is equal to 5. + * ``ends[1]`` is equal to 13, because length of the string "OpenVINO" is 8, and it needs to be summed up with length of the string "Intel". + * ``ends.shape`` is equal to ``[2]``, because the ``input`` is a batch of 2 strings. + +* *symbols* = "IntelOpenVINO" + * ``symbols`` contains concatenated string data encoded in utf-8 bytes, interpretable using ``begins`` and ``ends``. + * ``symbols.shape`` is equal to ``[13]``, because it's the length of concatenated ``input`` strings. + +When defining *begins* and *ends*, the notation ``[a, b)`` is used. This means that the range starts with ``a`` and includes all values up to, +but not including, ``b``. That is why in the example given the length of "IntelOpenVINO" is 12, but *ends* vector contains 13. + +**Inputs** + +* **1**: ``data`` - ND tensor of type *string*. **Required.** + +**Outputs** + +* **1**: ``begins`` - ND tensor of non-negative integer numbers of type *int32* and of the same shape as ``data`` input. + +* **2**: ``ends`` - ND tensor of non-negative integer numbers of type *int32* and of the same shape as ``data`` input. + +* **3**: ``symbols`` - 1D tensor of concatenated strings data encoded in utf-8 bytes, of type *u8* and size equal to the sum of the lengths of each string from the ``data`` input. + +**Examples** + +*Example 1: 1D input* + +For ``input = ["Intel", "OpenVINO"]`` + +.. code-block:: xml + :force: + + + + + 2 + + + + + 2 + + + 2 + + + 13 + + + + +*Example 2: input with an empty string* + +For ``input = ["OMZ", "", "GenAI", " ", "2024"]`` + +.. code-block:: xml + :force: + + + + + 5 + + + + + 2 + + + 2 + + + 13 + + + + +*Example 3: 2D input* + +For ``input = [["Intel", "OpenVINO"], ["OMZ", "GenAI"]]`` + +.. code-block:: xml + :force: + + + + + 2 + 2 + + + + + 2 + 2 + + + 2 + 2 + + + 21 + + + diff --git a/docs/articles_en/get-started.rst b/docs/articles_en/get-started.rst index 3c62dec7f261c2..f8a11a9ef9bf5b 100644 --- a/docs/articles_en/get-started.rst +++ b/docs/articles_en/get-started.rst @@ -113,8 +113,7 @@ Automated Device Configuration OpenVINO’s hardware device configuration options enable you to write an application once and deploy it anywhere with optimal performance. -* Increase application portability with :doc:`Automatic Device Selection (AUTO) ` -* Perform parallel inference across processors with :doc:`Multi-Device Execution (MULTI) ` +* Increase application portability and perform parallel inference across processors with :doc:`Automatic Device Selection (AUTO) ` * Efficiently split inference between hardware cores with :doc:`Heterogeneous Execution (HETERO) ` Flexible Model and Pipeline Configuration diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index f8fefc4abacece..6bc7ebeecbe7ad 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -47,7 +47,7 @@ All currently supported versions are: NPU V\* V\* V\ * n/a n/a n/a n/a V\* =============== ========== ====== =============== ======== ============ ========== ========== ========== - | \* **Of the Linux systems, version 22.04 includes drivers for NPU.** + | \* **Of the Linux systems, versions 22.04 and 24.04 include drivers for NPU.** | **For Windows, CPU inference on ARM64 is not supported.** .. dropdown:: Effortless GenAI integration with OpenVINO GenAI Flavor diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst index 2840e0be3b33b2..c64c27678b4fa4 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst @@ -27,7 +27,6 @@ Install OpenVINO™ Runtime on Linux from an Archive File CentOS7 x86_64 V V n/a Ubuntu20 x86_64 V V V Ubuntu22 x86_64 V V V - Ubuntu24 x86_64 V V V RHEL8 x86_64 V V n/a =================== ===== ===== ===== diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst index c9e32e907837b0..2c53e32481c00e 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst @@ -85,8 +85,7 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.2/windows/w_openvino_toolkit_windows_2024.2.0.15519.5c0f38f83f6_x86_64.zip --output openvino_2024.2.0.zip --output openvino_2024.2.0.zip - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.2/windows/w_openvino_toolkit_windows_2024.2.0.15519.5c0f38f83f6_x86_64.zip --output openvino_2024.2.0.zip .. note:: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst index d5461348e35112..28fd0e8dfd1903 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst @@ -71,6 +71,48 @@ need to install additional components. Check the description below, as well as t :doc:`list of additional configurations <../configurations>` to see if your case needs any of them. +Installing specific components of OpenVINO from Conda Forge ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +You do not have to install the entire OpenVINO package. You can install selected +components by using: + +.. code-block:: sh + + conda install conda-forge:: + +```` may be one of the components of OpenVINO listed below: + +- ``libopenvino-auto-batch-plugin`` +- ``libopenvino-auto-plugin`` +- ``libopenvino-hetero-plugin`` +- ``libopenvino-intel-cpu-plugin`` +- ``libopenvino-intel-gpu-plugin`` +- ``libopenvino-intel-npu-plugin`` +- ``libopenvino-ir-frontend`` +- ``libopenvino-onnx-frontend`` +- ``libopenvino-paddle-frontend`` +- ``libopenvino-pytorch-frontend`` +- ``libopenvino-tensorflow-frontend`` +- ``libopenvino-tensorflow-lite-frontend`` +- ``libopenvino-dev`` +- ``libopenvino-python`` +- ``libopenvino-arm-cpu-plugin`` + + +For example, to install a single component, use: + +.. code-block:: sh + + conda install conda-forge::libopenvino-intel-cpu-plugin + +For multiple components, use: + +.. code-block:: sh + + conda install conda-forge::libopenvino-intel-cpu-plugin conda-forge::libopenvino-arm-cpu-plugin conda-forge::libopenvino-intel-npu-plugin conda-forge::libopenvino-intel-gpu-plugin + + Compiling with OpenVINO Runtime from Conda-Forge on Linux +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -110,13 +152,19 @@ OpenCL™ Driver is included with the Intel® Graphics Driver package. Uninstalling OpenVINO™ Runtime ########################################################### -Once OpenVINO Runtime is installed via Conda, you can remove it using the following command, +Once OpenVINO Runtime is installed via Conda, you can remove it, using the following command, with the proper OpenVINO version number: .. code-block:: sh conda remove openvino=2024.2.0 +If you have installed specific components of OpenVINO, you can remove them, using: + +.. code-block:: sh + + conda remove conda-forge:: + What's Next? ############################################################ diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst index afa00cd6551e85..0ff1b95c8eb212 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst @@ -66,8 +66,6 @@ Step 1: Set up the environment CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine-samples" # Include OpenVINO Python API package in the target image. CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine-python3" - # Include model conversion API in the target image. - CORE_IMAGE_EXTRA_INSTALL:append = " openvino-model-optimizer" Step 2: Build a Yocto Image with OpenVINO Packages ################################################## @@ -102,9 +100,6 @@ If the image build is successful, it will return the list of packages as below: openvino-inference-engine-python3 openvino-inference-engine-samples openvino-inference-engine-src - openvino-model-optimizer - openvino-model-optimizer-dbg - openvino-model-optimizer-dev Additional Resources #################### diff --git a/docs/articles_en/learn-openvino/llm_inference_guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide.rst index 230dd5dd8bbd9a..6db776a3c1f5fb 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide.rst @@ -26,40 +26,35 @@ conversion to advanced use cases. The advantages of using OpenVINO for LLM deployment: -* **OpenVINO offers optimized LLM inference**; provides a full C/C++ API, leading to - faster operation than Python-based runtimes; includes a Python API for rapid - development, with the option for further optimization in C++. - -* **Compatible with diverse hardware**, supports CPUs, GPUs, and neural accelerators - across ARM and x86/x64 architectures, integrated Intel® Processor Graphics, discrete - Intel® Arc™ A-Series Graphics, and discrete Intel® Data Center GPU Flex Series; - features automated optimization to maximize performance on target hardware. - -* **Requires fewer dependencies** than frameworks like Hugging Face and PyTorch, - resulting in a smaller binary size and reduced memory footprint, making deployments - easier and updates more manageable. - -* **Provides compression and precision management techniques** such as 8-bit and 4-bit - weight compression, including embedding layers, and storage format reduction. This - includes fp16 precision for non-compressed models and int8/int4 for compressed models, - like GPTQ models from `Hugging Face `__. - -* **Supports a wide range of deep learning models and architectures** including text, - image, and audio generative models like Llama 2, MPT, OPT, Stable Diffusion, Stable - Diffusion XL. This enables the development of multimodal applications, allowing for +* **OpenVINO offers optimized LLM inference**: + provides a full C/C++ API, leading to faster operation than Python-based runtimes; includes a + Python API for rapid development, with the option for further optimization in C++. +* **Compatible with diverse hardware**: + supports CPUs, GPUs, and neural accelerators across ARM and x86/x64 architectures, integrated + Intel® Processor Graphics, discrete Intel® Arc™ A-Series Graphics, and discrete Intel® Data + Center GPU Flex Series; features automated optimization to maximize performance on target + hardware. +* **Requires fewer dependencies**: + than frameworks like Hugging Face and PyTorch, resulting in a smaller binary size and reduced + memory footprint, making deployments easier and updates more manageable. +* **Provides compression and precision management techniques**: + such as 8-bit and 4-bit weight compression, including embedding layers, and storage format + reduction. This includes fp16 precision for non-compressed models and int8/int4 for compressed + models, like GPTQ models from `Hugging Face `__. +* **Supports a wide range of deep learning models and architectures**: + including text, image, and audio generative models like Llama 2, MPT, OPT, Stable Diffusion, + Stable Diffusion XL. This enables the development of multimodal applications, allowing for write-once, deploy-anywhere capabilities. - -* **Enhances inference capabilities**: fused inference primitives such as Scaled Dot - Product Attention, Rotary Positional Embedding, Group Query Attention, and Mixture - of Experts. It also offers advanced features like in-place KV-cache, dynamic - quantization, KV-cache quantization and encapsulation, dynamic beam size +* **Enhances inference capabilities**: + fused inference primitives such as Scaled Dot Product Attention, Rotary Positional Embedding, + Group Query Attention, and Mixture of Experts. It also offers advanced features like in-place + KV-cache, dynamic quantization, KV-cache quantization and encapsulation, dynamic beam size configuration, and speculative sampling. - -* **Provides stateful model optimization**: models from the Hugging Face Transformers - are converted into a stateful form, optimizing inference performance and memory usage - in long-running text generation tasks by managing past KV-cache tensors more - efficiently internally. This feature is automatically activated for many supported - models, while unsupported ones remain stateless. Learn more about the +* **Provides stateful model optimization**: + models from the Hugging Face Transformers are converted into a stateful form, optimizing + inference performance and memory usage in long-running text generation tasks by managing past + KV-cache tensors more efficiently internally. This feature is automatically activated for many + supported models, while unsupported ones remain stateless. Learn more about the :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. OpenVINO offers three main paths for Generative AI use cases: diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 821f16062c271b..79c3471f3ab783 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -30,7 +30,8 @@ will not work with these instructions, make sure to .. code-block:: python - optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code + optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" + 2. Perform generation using the new GenAI API: @@ -54,9 +55,10 @@ will not work with these instructions, make sure to #include int main(int argc, char* argv[]) { - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU");//target device is CPU - std::cout << pipe.generate("The Sun is yellow because"); //input context + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + std::cout << pipe.generate("The Sun is yellow because"); + } The `LLMPipeline` is the main object used for decoding. You can construct it directly from the folder with the converted model. It will automatically load the main model, tokenizer, detokenizer, @@ -74,6 +76,17 @@ below, where a lambda function outputs words to the console immediately upon gen .. tab-set:: + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path, "CPU") + + streamer = lambda x: print(x, end='', flush=True) + pipe.generate("The Sun is yellow because", streamer=streamer) + .. tab-item:: C++ .. code-block:: cpp @@ -85,14 +98,43 @@ below, where a lambda function outputs words to the console immediately upon gen std::string model_path = argv[1]; ov::genai::LLMPipeline pipe(model_path, "CPU"); - auto streamer = [](std::string word) { std::cout << word << std::flush; }; - std::cout << pipe.generate("The Sun is yellow because", streamer); + auto streamer = [](std::string word) { + std::cout << word << std::flush; + // Return flag indicating whether generation should be stopped. + // false means continue generation. + return false; + }; + pipe.generate("The Sun is yellow because", ov::genai::streamer(streamer)); } You can also create your custom streamer for more sophisticated processing: .. tab-set:: + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import openvino_genai as ov_genai + + class CustomStreamer(ov_genai.StreamerBase): + def __init__(self, tokenizer): + ov_genai.StreamerBase.__init__(self) + self.tokenizer = tokenizer + def put(self, token_id) -> bool: + # Decode tokens and process them. + # Streamer returns a flag indicating whether generation should be stopped. + # In Python, `return` can be omitted. In that case, the function will return None + # which will be converted to False, meaning that generation should continue. + # return stop_flag + def end(self): + # Decode tokens and process them. + + pipe = ov_genai.LLMPipeline(model_path, "CPU") + pipe.generate("The Sun is yellow because", streamer=CustomStreamer()) + + .. tab-item:: C++ .. code-block:: cpp @@ -101,20 +143,31 @@ You can also create your custom streamer for more sophisticated processing: class CustomStreamer: publict StreamerBase { public: - void put(int64_t token) {/* decode tokens and do process them*/}; - - void end() {/* decode tokens and do process them*/}; + bool put(int64_t token) { + bool stop_flag = false; + /* + custom decoding/tokens processing code + tokens_cache.push_back(token); + std::string text = m_tokenizer.decode(tokens_cache); + ... + */ + return stop_flag; // Flag indicating whether generation should be stopped. If True, generation stops. + }; + + void end() { + /* custom finalization */ + }; }; int main(int argc, char* argv[]) { - CustomStreamer custom_streamer; + auto custom_streamer = std::make_shared(); std::string model_path = argv[1]; - ov::LLMPipeline pipe(model_path, "CPU"); - cout << pipe.generate("The Sun is yellow because", custom_streamer); + ov::genai::LLMPipeline pipe(model_path, "CPU"); + pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer)); } -Optimizing the Chat Scenario +Using GenAI in Chat Scenario ################################ For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs @@ -131,16 +184,15 @@ mark a conversation session, as you can see in these simple examples: import openvino_genai as ov_genai pipe = ov_genai.LLMPipeline(model_path) - config = {'num_groups': 3, 'group_size': 5, 'diversity_penalty': 1.1} - pipe.set_generation_cofnig(config) + pipe.set_generation_cofnig({'max_new_tokens': 100) pipe.start_chat() while True: -     print('question:') -     prompt = input() + print('question:') + prompt = input() if prompt == 'Stop!': -         break -     print(pipe(prompt)) + break + print(pipe.generate(prompt)) pipe.finish_chat() @@ -153,14 +205,18 @@ mark a conversation session, as you can see in these simple examples: std::string prompt; std::string model_path = argv[1]; - ov::LLMPipeline pipe(model_path, "CPU"); + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + ov::genai::GenerationConfig config = pipe.get_generation_config(); + config.max_new_tokens = 100; + pipe.set_generation_cofnig(config) pipe.start_chat(); for (size_t i = 0; i < questions.size(); i++) { std::cout << "question:\n"; std::getline(std::cin, prompt); - std::cout << pipe(prompt) << std::endl>>; + std::cout << pipe.generate(prompt) << std::endl; } pipe.finish_chat(); } @@ -171,60 +227,43 @@ Optimizing Generation with Grouped Beam Search Leverage grouped beam search decoding and configure generation_config for better text generation quality and efficient batch processing in GenAI applications. -Use grouped beam search decoding: +Specify generation_config to use grouped beam search: .. tab-set:: - .. tab-item:: C++ - - .. code-block:: cpp - - int main(int argc, char* argv[]) { - std::string model_path = argv[1]; - ov::LLMPipeline pipe(model_path, "CPU"); - ov::GenerationConfig config = pipe.get_generation_config(); - config.max_new_tokens = 256; - config.num_groups = 3; - config.group_size = 5; - config.diversity_penalty = 1.0f; + .. tab-item:: Python + :sync: py - cout << pipe.generate("The Sun is yellow because", config); - } + .. code-block:: python -Specify generation_config to use grouped beam search: + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path, "CPU") + config = pipe.get_generation_config() + config.max_new_tokens = 256 + config.num_beam_groups = 3 + config.num_beams = 15 + config.diversity_penalty = 1.0 + pipe.generate("The Sun is yellow because", config) -.. tab-set:: .. tab-item:: C++ + :sync: cpp .. code-block:: cpp int main(int argc, char* argv[]) { - std::string prompt; - std::string model_path = argv[1]; - ov::LLMPipeline pipe(model_path, "CPU"); - - ov::GenerationConfig config = pipe.get_generation_config(); + ov::genai::LLMPipeline pipe(model_path, "CPU"); + ov::genai::GenerationConfig config = pipe.get_generation_config(); config.max_new_tokens = 256; - config.num_groups = 3; - config.group_size = 5; + config.num_beam_groups = 3; + config.num_beams = 15; config.diversity_penalty = 1.0f; - auto streamer = [](std::string word) { std::cout << word << std::flush; }; - - pipe.start_chat(); - for (size_t i = 0; i < questions.size(); i++) { - - std::cout << "question:\n"; - cout << prompt << endl; - - auto answer = pipe(prompt, config, streamer); - // no need to print answer, streamer will do that - } - pipe.finish_chat(); + cout << pipe.generate("The Sun is yellow because", config); } + Comparing with Hugging Face Results ####################################### @@ -237,6 +276,7 @@ Compare and analyze results with those generated by Hugging Face models. .. code-block:: python from transformers import AutoTokenizer, AutoModelForCausalLM + import openvino_genai as ov_genai tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") @@ -249,12 +289,8 @@ Compare and analyze results with those generated by Hugging Face models. hf_output = tokenizer.decode(hf_encoded_output[0, encoded_prompt.shape[1]:]) print(f'hf_output: {hf_output}') - import sys - sys.path.append('build-Debug/') - import py_generate_pipeline as genai # set more friendly module name - - pipe = genai.LLMPipeline('text_generation/causal_lm/TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/') - ov_output = pipe(prompt, max_new_tokens=max_new_tokens) + pipe = ov_genai.LLMPipeline('TinyLlama-1.1B-Chat-v1.0') + ov_output = pipe.generate(prompt, max_new_tokens=max_new_tokens) print(f'ov_output: {ov_output}') assert hf_output == ov_output diff --git a/docs/articles_en/openvino-workflow/deployment-locally.rst b/docs/articles_en/openvino-workflow/deployment-locally.rst index 53eb9a8829eb2f..a8cdd8949fb318 100644 --- a/docs/articles_en/openvino-workflow/deployment-locally.rst +++ b/docs/articles_en/openvino-workflow/deployment-locally.rst @@ -65,7 +65,7 @@ The granularity of OpenVINO packages may vary for different distribution types. - The main library ``openvino`` is used by users' C++ applications to link against with. For C language applications, ``openvino_c`` is additionally required for distribution. The library includes OpenVINO API 2.0. -- The "optional" plugin libraries like ``openvino_intel_cpu_plugin`` (matching the ``openvino_.+_plugin`` pattern) are used to provide inference capabilities on specific devices or additional capabilities like :doc:`Hetero Execution ` and :doc:`Multi-Device Execution `. +- The "optional" plugin libraries like ``openvino_intel_cpu_plugin`` (matching the ``openvino_.+_plugin`` pattern) are used to provide inference capabilities on specific devices or additional capabilities like :doc:`Hetero Execution `. - The "optional" plugin libraries like ``openvino_ir_frontend`` (matching ``openvino_.+_frontend``) are used to provide capabilities to read models of different file formats such as OpenVINO IR, TensorFlow, ONNX, and PaddlePaddle. Here the term "optional" means that if the application does not use the capability enabled by the plugin, the plugin library or a package with the plugin is not needed in the final distribution. diff --git a/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst b/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst index 9889f15c0ecbd9..629b6646a7a80e 100644 --- a/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst +++ b/docs/articles_en/openvino-workflow/deployment-locally/local-distribution-libraries.rst @@ -10,20 +10,34 @@ Libraries for Local Distribution needed to deploy the application. -With local distribution, each C or C++ application/installer has its own copies of OpenVINO Runtime binaries. However, OpenVINO has a scalable plugin-based architecture, which means that some components can be loaded in runtime only when they are really needed. This guide helps you understand what minimal set of libraries is required to deploy the application. - -Local distribution is also suitable for OpenVINO binaries built from source using `Build instructions `__, -but this guide assumes that OpenVINO Runtime is built dynamically. For `Static OpenVINO Runtime `__, select the required OpenVINO capabilities at the CMake configuration stage using `CMake Options for Custom Compilation `__, then build and link the OpenVINO components to the final application. +With local distribution, each C or C++ application/installer has its own copies of OpenVINO Runtime binaries. +However, OpenVINO has a scalable plugin-based architecture, which means that some components +can be loaded in runtime only when they are really needed. This guide helps you understand +what minimal set of libraries is required to deploy the application. + +Local distribution is also suitable for OpenVINO binaries built from source using +`Build instructions `__, +but this guide assumes that OpenVINO Runtime is built dynamically. +For `Static OpenVINO Runtime `__, +select the required OpenVINO capabilities at the CMake configuration stage using +`CMake Options for Custom Compilation `__, +then build and link the OpenVINO components to the final application. .. note:: - The steps below are independent of the operating system and refer to the library file name without any prefixes (like ``lib`` on Unix systems) or suffixes (like ``.dll`` on Windows OS). Do not put ``.lib`` files on Windows OS to the distribution because such files are needed only at a linker stage. + The steps below are independent of the operating system and refer to the library file name + without any prefixes (like ``lib`` on Unix systems) or suffixes (like ``.dll`` on Windows OS). + Do not put ``.lib`` files on Windows OS to the distribution because such files are needed + only at a linker stage. Library Requirements for C++ and C Languages ############################################ -Regardless of the programming language of an application, the ``openvino`` library must always be included in its final distribution. This core library manages all inference and frontend plugins. The ``openvino`` library depends on the TBB libraries which are used by OpenVINO Runtime to optimally saturate devices with computations. +Regardless of the programming language of an application, the ``openvino`` library must always +be included in its final distribution. This core library manages all inference and frontend plugins. +The ``openvino`` library depends on the TBB libraries which are used by OpenVINO Runtime +to optimally saturate devices with computations. If your application is in C language, you need to additionally include the ``openvino_c`` library. @@ -48,7 +62,9 @@ For each inference device, OpenVINO Runtime has its own plugin library: Depending on which devices are used in the app, the corresponding libraries should be included in the distribution package. -As shown in the picture above, some plugin libraries may have OS-specific dependencies which are either backend libraries or additional supports files with firmware, etc. Refer to the table below for details: +As shown in the picture above, some plugin libraries may have OS-specific dependencies +which are either backend libraries or additional supports files with firmware, etc. +Refer to the table below for details: .. tab-set:: @@ -111,10 +127,11 @@ As shown in the picture above, some plugin libraries may have OS-specific depend Libraries for Execution Modes +++++++++++++++++++++++++++++ -The ``HETERO``, ``MULTI``, ``BATCH``, and ``AUTO`` execution modes can also be used by the application explicitly or implicitly. +The ``HETERO``, ``BATCH``, and ``AUTO`` execution modes can also be used by the application explicitly or implicitly. Use the following recommendation scheme to decide whether to add the appropriate libraries to the distribution package: -- If :doc:`AUTO <../running-inference/inference-devices-and-modes/auto-device-selection>` is used explicitly in the application or ov::Core::compile_model is used without specifying a device, put ``openvino_auto_plugin`` to the distribution. +- If :doc:`AUTO <../running-inference/inference-devices-and-modes/auto-device-selection>` is used + explicitly in the application or ``ov::Core::compile_model`` is used without specifying a device, put ``openvino_auto_plugin`` to the distribution. .. note:: @@ -122,9 +139,10 @@ Use the following recommendation scheme to decide whether to add the appropriate If you are not sure which inference devices are available on the target system, put all inference plugin libraries in the distribution. If ov::device::priorities is used for `AUTO` to specify a limited device list, grab the corresponding device plugins only. -- If :doc:`MULTI <../running-inference/inference-devices-and-modes/multi-device>` is used explicitly, put ``openvino_auto_plugin`` in the distribution. -- If :doc:`HETERO <../running-inference/inference-devices-and-modes/hetero-execution>` is either used explicitly or ov::hint::performance_mode is used with GPU, put ``openvino_hetero_plugin`` in the distribution. -- If :doc:`BATCH <../running-inference/inference-devices-and-modes/automatic-batching>` is either used explicitly or ``ov::hint::performance_mode`` is used with GPU, put ``openvino_batch_plugin`` in the distribution. +- If :doc:`HETERO <../running-inference/inference-devices-and-modes/hetero-execution>` is either + used explicitly or ``ov::hint::performance_mode`` is used with GPU, put ``openvino_hetero_plugin`` in the distribution. +- If :doc:`BATCH <../running-inference/inference-devices-and-modes/automatic-batching>` is either + used explicitly or ``ov::hint::performance_mode`` is used with GPU, put ``openvino_batch_plugin`` in the distribution. Frontend Libraries for Reading Models +++++++++++++++++++++++++++++++++++++ @@ -138,46 +156,41 @@ OpenVINO Runtime uses frontend libraries dynamically to read models in different - ``openvino_paddle_frontend`` is used to read the Paddle file format. - ``openvino_pytorch_frontend`` is used to convert PyTorch model via ``openvino.convert_model`` API. -Depending on the model format types that are used in the application in ov::Core::read_model, select the appropriate libraries. +Depending on the model format types that are used in the application in ``ov::Core::read_model``, select the appropriate libraries. .. note:: - To optimize the size of the final distribution package, it is recommended to convert models to OpenVINO IR by using :doc:`model conversion API <../model-preparation>`. This way you do not have to keep TensorFlow, TensorFlow Lite, ONNX, PaddlePaddle, and other frontend libraries in the distribution package. + To optimize the size of the final distribution package, it is recommended to convert models + to OpenVINO IR by using :doc:`model conversion API <../model-preparation>`. This way you + do not have to keep TensorFlow, TensorFlow Lite, ONNX, PaddlePaddle, and other frontend + libraries in the distribution package. Examples #################### -**CPU + OpenVINO IR in C application** - -In this example, the application is written in C, performs inference on CPU, and reads models stored in the OpenVINO IR format. - -The following libraries are used: ``openvino_c``, ``openvino``, ``openvino_intel_cpu_plugin``, and ``openvino_ir_frontend``. - -- The ``openvino_c`` library is a main dependency of the application. The app links against this library. -- The ``openvino`` library is used as a private dependency for ``openvino_c`` and is also used in the deployment. -- ``openvino_intel_cpu_plugin`` is used for inference. -- ``openvino_ir_frontend`` is used to read source models. - -**MULTI execution on GPU and CPU in `tput` mode** +.. dropdown:: CPU + OpenVINO IR in C application -In this example, the application is written in C++, performs inference :doc:`simultaneously on GPU and CPU devices <../running-inference/inference-devices-and-modes/multi-device>` with the ov::hint::PerformanceMode::THROUGHPUT property set, and reads models stored in the ONNX format. + In this example, the application is written in C, performs inference on CPU, and reads models stored in the OpenVINO IR format. -The following libraries are used: ``openvino``, ``openvino_intel_gpu_plugin``, ``openvino_intel_cpu_plugin``, ``openvino_auto_plugin``, ``openvino_auto_batch_plugin``, and ``openvino_onnx_frontend``. + The following libraries are used: ``openvino_c``, ``openvino``, ``openvino_intel_cpu_plugin``, and ``openvino_ir_frontend``. -- The ``openvino`` library is a main dependency of the application. The app links against this library. -- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. -- ``openvino_auto_plugin`` is used for Multi-Device Execution. -- ``openvino_auto_batch_plugin`` can be also put in the distribution to improve the saturation of :doc:`Intel® GPU <../running-inference/inference-devices-and-modes/gpu-device>` device. If there is no such plugin, :doc:`Automatic Batching <../running-inference/inference-devices-and-modes/automatic-batching>` is turned off. -- ``openvino_onnx_frontend`` is used to read source models. + - The ``openvino_c`` library is a main dependency of the application. The app links against this library. + - The ``openvino`` library is used as a private dependency for ``openvino_c`` and is also used in the deployment. + - ``openvino_intel_cpu_plugin`` is used for inference. + - ``openvino_ir_frontend`` is used to read source models. -**Auto-Device Selection between GPU and CPU** +.. dropdown:: Auto-Device Selection between GPU and CPU -In this example, the application is written in C++, performs inference with the :doc:`Automatic Device Selection <../running-inference/inference-devices-and-modes/auto-device-selection>` mode, limiting device list to GPU and CPU, and reads models :doc:`created using C++ code <../running-inference/integrate-openvino-with-your-application/model-representation>`. + In this example, the application is written in C++, performs inference + with the :doc:`Automatic Device Selection <../running-inference/inference-devices-and-modes/auto-device-selection>` + mode, limiting device list to GPU and CPU, and reads models + :doc:`created using C++ code <../running-inference/integrate-openvino-with-your-application/model-representation>`. -The following libraries are used: ``openvino``, ``openvino_auto_plugin``, ``openvino_intel_gpu_plugin``, and ``openvino_intel_cpu_plugin``. + The following libraries are used: ``openvino``, ``openvino_auto_plugin``, ``openvino_intel_gpu_plugin``, and ``openvino_intel_cpu_plugin``. -- The ``openvino`` library is a main dependency of the application. The app links against this library. -- ``openvino_auto_plugin`` is used to enable Automatic Device Selection. -- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine. -- No frontend library is needed because ``ov::Model`` is created in code. + - The ``openvino`` library is a main dependency of the application. The app links against this library. + - ``openvino_auto_plugin`` is used to enable Automatic Device Selection. + - ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. AUTO + selects between CPU and GPU devices according to their physical existence on the deployed machine. + - No frontend library is needed because ``ov::Model`` is created in code. diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst index ae4d03877f18ac..7a8a11a24bf56a 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst @@ -228,8 +228,7 @@ To restore the model from checkpoint you should use the following API: :language: python :fragment: [load_checkpoint] -For more details on saving/loading checkpoints in the NNCF, see the following -`documentation `__. +For more details, see the following `documentation `__. Deploying pruned model ###################### diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training-tensorflow.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training-tensorflow.rst index 41a2ea615214a8..57abad31a42354 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training-tensorflow.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training-tensorflow.rst @@ -91,7 +91,8 @@ To restore the model from checkpoint, use the following API: :fragment: [load_checkpoint] -For more details on saving/loading checkpoints in the NNCF, see the corresponding `NNCF documentation `__. +For more details on saving/loading checkpoints in the NNCF, see the corresponding +`NNCF documentation `__. Deploying quantized model ######################### diff --git a/docs/articles_en/openvino-workflow/model-optimization.rst b/docs/articles_en/openvino-workflow/model-optimization.rst index 6b56ee121b47ae..17903c760e1c4c 100644 --- a/docs/articles_en/openvino-workflow/model-optimization.rst +++ b/docs/articles_en/openvino-workflow/model-optimization.rst @@ -65,5 +65,5 @@ Additional Resources - :doc:`Training-time Optimization ` - :doc:`Weight Compression ` - :doc:`Deployment optimization ` -- `HuggingFace Optimum Intel `__ +- `Hugging Face Optimum Intel `__ diff --git a/docs/articles_en/openvino-workflow/model-preparation.rst b/docs/articles_en/openvino-workflow/model-preparation.rst index f4b4b6787590ad..c6c7eaeb17fb31 100644 --- a/docs/articles_en/openvino-workflow/model-preparation.rst +++ b/docs/articles_en/openvino-workflow/model-preparation.rst @@ -1,5 +1,3 @@ -.. {#openvino_docs_model_processing_introduction} - Model Preparation ================= @@ -66,15 +64,15 @@ The easiest way to obtain a model is to download it from an online database, suc For PyTorch models, `Python API <#convert-a-model-with-python-convert-model>`__ is the only conversion option. -Model States +Different model representations ############################################## -There are three states a model in OpenVINO can be: saved on disk, loaded but not compiled -(``ov.Model``) or loaded and compiled (``ov.CompiledModel``). +A model in OpenVINO can be represented in three ways: saved on disk, loaded but not compiled +(``ov.Model``), and loaded and compiled (``ov.CompiledModel``). | **Saved on disk** -| A model in this state consists of one or more files that fully represent the neural - network. A model can be stored in different ways. For example: +| One or more files saved on a drive, fully representing the neural network. + Different model formats are stored in different ways, for example: | OpenVINO IR: pair of .xml and .bin files | ONNX: .onnx file | TensorFlow: directory with a .pb file and two subfolders or just a .pb file @@ -88,7 +86,7 @@ There are three states a model in OpenVINO can be: saved on disk, loaded but not applying quantization or even adding preprocessing steps before compiling the model. | **Loaded and compiled** -| This state is achieved when one or more devices are specified for a model object to +| This representation is achieved when one or more devices are specified for a model object to run on (``ov.CompiledModel``), allowing device optimizations to be made and enabling inference. diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-tensorflow.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-tensorflow.rst index 1db451045e702d..e66a5a87c48cd3 100644 --- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-tensorflow.rst +++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-tensorflow.rst @@ -163,7 +163,7 @@ There are three ways to store non-frozen TensorFlow models. 2. **Checkpoint**. In this case, a model consists of two files: ``inference_graph.pb`` (or ``inference_graph.pbtxt``) and ``checkpoint_file.ckpt``. If you do not have an inference graph file, refer to the - `Freezing Custom Models in Python <#Freezing-Custom-Models-in-Python>`__ section. + `Freezing Custom Models in Python <#freezing-custom-models-in-python>`__ section. To convert the model with the inference graph in ``.pb`` format, provide paths to both files as an argument for ``ovc`` or ``openvino.convert_model``: diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst index be67f581173309..171422f932ea5b 100644 --- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst +++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst @@ -1,6 +1,3 @@ -.. {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_IR} - - Convert to OpenVINO IR ============================================= @@ -18,14 +15,9 @@ Convert to OpenVINO IR Convert from PaddlePaddle -:doc:`IR (Intermediate Representation) <../../documentation/openvino-ir-format>` is -OpenVINO own format consisting of ``.xml`` and ``.bin`` files. -Convert the model into OpenVINO IR for `better performance <#ir-conversion-benefits>`__. - -Convert Models -############################################## -Here are code examples of how to use these methods with different model formats: +:doc:`OpenVINO IR <../../documentation/openvino-ir-format>` is the proprietary model format +used by OpenVINO, typically obtained by converting models of supported frameworks: .. tab-set:: @@ -75,7 +67,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -175,7 +171,8 @@ Here are code examples of how to use these methods with different model formats: .. tab-item:: CLI :sync: cli - You can use ``ovc`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``ovc`` command-line tool to convert a model to IR. The obtained IR can + then be read by ``read_model()`` and inferred. .. code-block:: sh @@ -194,7 +191,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -294,7 +295,8 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can + then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -319,7 +321,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -416,7 +422,8 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR + can then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -441,7 +448,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -545,7 +556,8 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR + can then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -561,32 +573,38 @@ Here are code examples of how to use these methods with different model formats: :doc:`article `. -* :doc:`How to convert PyTorch ` -* :doc:`How to convert ONNX ` -* :doc:`How to convert TensorFlow ` -* :doc:`How to convert TensorFlow Lite ` -* :doc:`How to convert PaddlePaddle ` -To choose the best workflow for your application, read the :doc:`Model Preparation section <../model-preparation>`. +These are basic examples, for detailed conversion instructions, see the individual guides on +:doc:`PyTorch `, :doc:`ONNX `, +:doc:`TensorFlow `, :doc:`TensorFlow Lite `, +and :doc:`PaddlePaddle `. Refer to the list of all supported conversion options in :doc:`Conversion Parameters `. IR Conversion Benefits ################################################ - | **Saving to IR to improve first inference latency** -| When first inference latency matters, rather than convert the framework model each time it is loaded, which may take some time depending on its size, it is better to do it once. Save the model as an OpenVINO IR with ``save_model`` and then load it with ``read_model`` as needed. This should improve the time it takes the model to make the first inference as it avoids the conversion step. +| When first inference latency matters, rather than convert the framework model each time it + is loaded, which may take some time depending on its size, it is better to do it once. Save + the model as an OpenVINO IR with ``save_model`` and then load it with ``read_model`` as + needed. This should improve the time it takes the model to make the first inference as it + avoids the conversion step. | **Saving to IR in FP16 to save space** -| Save storage space, even more so if FP16 is used as it may cut the size by about 50%, especially useful for large models, like Llama2-7B. +| Save storage space, even more so if FP16 is used as it may cut the size by about 50%, + especially useful for large models, like Llama2-7B. | **Saving to IR to avoid large dependencies in inference code** -| Frameworks such as TensorFlow and PyTorch tend to be large dependencies (multiple gigabytes), and not all inference environments have enough space to hold them. -| Converting models to OpenVINO IR allows them to be used in an environment where OpenVINO is the only dependency, so much less disk space is needed. -| Loading and compiling with OpenVINO directly usually takes less runtime memory than loading the model in the source framework and then converting and compiling it. - -An example showing how to take advantage of OpenVINO IR, saving a model in OpenVINO IR once, using it many times, is shown below: +| Frameworks such as TensorFlow and PyTorch tend to be large dependencies for applications + running inference (multiple gigabytes). Converting models to OpenVINO IR removes this + dependency, as OpenVINO can run its inference with no additional components. + This way, much less disk space is needed, while loading and compiling usually takes less + runtime memory than loading the model in the source framework and then converting + and compiling it. + +Here is an example of how to benefit from OpenVINO IR, saving a model once and running it +multiple times: .. code-block:: py diff --git a/docs/articles_en/openvino-workflow/running-inference.rst b/docs/articles_en/openvino-workflow/running-inference.rst index 3ccd9f3ff7cc2e..95c6bc66e902b8 100644 --- a/docs/articles_en/openvino-workflow/running-inference.rst +++ b/docs/articles_en/openvino-workflow/running-inference.rst @@ -39,9 +39,9 @@ OpenVINO IR provides by far the best first-inference latency scores. For more detailed information on how to convert, read, and compile supported model formats see the :doc:`Model Preparation article `. - Note that TensorFlow models can be run using the + Note that PyTorch models can be run using the :doc:`torch.compile feature `, as well as the standard ways of - :doc:`converting TensorFlow ` + :doc:`converting Pytorch ` or running its inference. OpenVINO Runtime uses a plugin architecture. Its plugins are software components that contain complete implementation for inference on a particular Intel® hardware device: CPU, GPU, etc. Each plugin implements the unified API and provides additional hardware-specific APIs for configuring devices or API interoperability between OpenVINO Runtime and underlying plugin backend. diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst index 19328ec7a9c1f9..9e41f8b01fe729 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst @@ -12,7 +12,6 @@ Inference Devices and Modes inference-devices-and-modes/gpu-device inference-devices-and-modes/npu-device inference-devices-and-modes/auto-device-selection - inference-devices-and-modes/multi-device inference-devices-and-modes/hetero-execution inference-devices-and-modes/automatic-batching inference-devices-and-modes/query-device-properties @@ -32,11 +31,11 @@ different conditions: | Assume certain level of automation in selecting devices for inference. They may potentially increase your deployed solution's performance and portability. The automated modes are: | :doc:`Automatic Device Selection (AUTO) ` -| :doc:`Multi-Device Execution (MULTI) ` | :doc:`Heterogeneous Execution (HETERO) ` | :doc:`Automatic Batching Execution (Auto-batching) ` +| :doc:`[DEPRECATED] Multi-Device Execution (MULTI) <../../documentation/legacy-features/multi-device>` - +To learn how to change the device configuration, read the :doc:`Query device properties article `. Enumerating Available Devices ####################################### @@ -83,4 +82,10 @@ Accordingly, the code that loops over all available devices of the "GPU" type on :language: cpp :fragment: [part3] +Additional Resources +#################### +* `OpenVINO™ Runtime API Tutorial <./../../notebooks/openvino-api-with-output.html>`__ +* `AUTO Device Tutorial <./../../notebooks/auto-device-with-output.html>`__ +* `GPU Device Tutorial <./../../notebooks/gpu-device-with-output.html>`__ +* `NPU Device Tutorial <./../../notebooks/hello-npu-with-output.html>`__ \ No newline at end of file diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst index 9a860d8411005e..1de39e9b91412b 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst @@ -95,6 +95,10 @@ model and perform the first inference) is reduced when using AUTO. For example: The longer the process runs, the closer realtime performance will be to that of the best-suited device. +.. note:: + + **Testing accuracy with the AUTO device is not recommended.** Since the CPU and GPU (or other target devices) may produce slightly different accuracy numbers, using AUTO could lead to inconsistent accuracy results from run to run due to a different number of inferences on CPU and GPU. This is particularly true when testing with a small number of inputs. To achieve consistent inference on the GPU (or another target device), you can disable CPU acceleration by setting ``ov::intel_auto::enable_startup_fallback`` to false. + Using AUTO ########## @@ -186,7 +190,7 @@ the following setup options: | | ``DEVICE_PRIORITY`` | | | | | | Specify the schedule policy of infer request assigned to hardware | -| | plugin for AUTO cumulative mode (MULTI). | +| | plugin for AUTO cumulative mode. | | | | | | The default value is ``DEVICE_PRIORITY``. | +----------------------------------------------+--------------------------------------------------------------------+ @@ -325,24 +329,24 @@ This option prioritizes low latency, providing short response time for each infe If no performance hint is set explicitly, AUTO will set LATENCY for devices that have not set ``ov::device::properties``, for example, ``ov::device::properties(, ov::hint::performance_mode(ov::hint::LATENCY))``. - .. _cumulative throughput: - -THROUGHPUT +``THROUGHPUT`` -------------------- This option prioritizes high throughput, balancing between latency and power. It is best suited for tasks involving multiple jobs, such as inference of video feeds or large numbers of images. +``CUMULATIVE_THROUGHPUT`` +--------------------------------- -CUMULATIVE_THROUGHPUT ---------------------- - -While ``LATENCY`` and ``THROUGHPUT`` can select one target device with your preferred performance option, the ``CUMULATIVE_THROUGHPUT`` option enables running inference on multiple devices for higher throughput. With ``CUMULATIVE_THROUGHPUT``, AUTO loads the network model to all available devices in the candidate list, and then runs inference on them based on the default or specified priority. - -CUMULATIVE_THROUGHPUT has similar behavior as :doc:`the Multi-Device execution mode (MULTI) `. The only difference is that CUMULATIVE_THROUGHPUT uses the devices specified by AUTO, which means that it's not mandatory to add devices manually, while with MULTI, you need to specify the devices before inference. +While ``LATENCY`` and ``THROUGHPUT`` can select one target device with your preferred performance option, +the ``CUMULATIVE_THROUGHPUT`` option enables running inference on multiple devices for higher throughput. +With ``CUMULATIVE_THROUGHPUT``, AUTO loads the network model to all available devices (specified by AUTO) +in the candidate list, and then runs inference on them based on the default or specified priority. -If device priority is specified when using CUMULATIVE_THROUGHPUT, AUTO will run inference requests on devices based on the priority. In the following example, AUTO will always try to use GPU first, and then use CPU if GPU is busy: +If device priority is specified when using ``CUMULATIVE_THROUGHPUT``, AUTO will run inference +requests on devices based on the priority. In the following example, AUTO will always +try to use GPU first, and then use CPU if GPU is busy: .. tab-set:: @@ -361,7 +365,7 @@ If device priority is specified when using CUMULATIVE_THROUGHPUT, AUTO will run ov::CompiledModel compiled_model = core.compile_model(model, "AUTO:GPU,CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); -If AUTO is used without specifying any device names, and if there are multiple GPUs in the system, CUMULATIVE_THROUGHPUT mode will use all of the GPUs by default. If the system has more than two GPU devices, AUTO will remove CPU from the device candidate list to keep the GPUs running at full capacity. A full list of system devices and their unique identifiers can be queried using ov::Core::get_available_devices (for more information, see :doc:`Query Device Properties `). To explicitly specify which GPUs to use, set their priority when compiling with AUTO: +If AUTO is used without specifying any device names, and if there are multiple GPUs in the system, ``CUMULATIVE_THROUGHPUT`` mode will use all of the GPUs by default. If the system has more than two GPU devices, AUTO will remove CPU from the device candidate list to keep the GPUs running at full capacity. A full list of system devices and their unique identifiers can be queried using ov::Core::get_available_devices (for more information, see :doc:`Query Device Properties `). To explicitly specify which GPUs to use, set their priority when compiling with AUTO: .. tab-set:: @@ -509,8 +513,9 @@ For more information, refer to the :doc:`Benchmark Tool <../../../learn-openvino Additional Resources #################### -- :doc:`Debugging AUTO ` -- :doc:`Running on Multiple Devices Simultaneously ` -- :doc:`Inference Devices and Modes <../inference-devices-and-modes>` +* `Automatic Device Selection with OpenVINO™ Notebook `__ +* :doc:`Debugging AUTO ` +* :doc:`(LEGACY) Running on Multiple Devices Simultaneously <../../../documentation/legacy-features/multi-device>` +* :doc:`Inference Devices and Modes <../inference-devices-and-modes>` diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst index e2f87fed9d3c60..8b16e3971a2519 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst @@ -209,11 +209,11 @@ For more details and code examples, see the :doc:`Precision Control <../optimize Supported Features ########################################################### -Multi-device Execution +Automatic Device Selection +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ If a system includes OpenVINO-supported devices other than the CPU (e.g. an integrated GPU), then any supported model can be executed on all the devices simultaneously. -This can be achieved by specifying ``MULTI:CPU,GPU.0`` as a target device in case of simultaneous usage of CPU and GPU. +This can be achieved by specifying ``AUTO:CPU,GPU.0`` as a target device, and adding the ``CUMULATIVE_THROUGHPUT`` parameter. .. tab-set:: @@ -222,17 +222,17 @@ This can be achieved by specifying ``MULTI:CPU,GPU.0`` as a target device in cas .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_cpu.py :language: py - :fragment: [compile_model_multi] + :fragment: [compile_model_auto] .. tab-item:: C++ :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_cpu.cpp :language: cpp - :fragment: [compile_model_multi] + :fragment: [compile_model_auto] -For more details, see the :doc:`Multi-device execution ` article. +For more details, see the :doc:`Automatic Device Selection `. .. _multi_stream_execution: diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst index b1449a4545b95f..dc158fe9352042 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst @@ -7,8 +7,8 @@ Performance Hints and Thread Scheduling on performance hints automatically. To simplify the configuration of hardware devices, it is recommended to use the -:doc:` ov::hint::PerformanceMode::LATENCY and ov::hint::PerformanceMode::THROUGHPUT <../../../../optimize-inference/high-level-performance-hints>` -high-level performance hints. Both performance hints ensure optimal portability +ov::hint::PerformanceMode::LATENCY and ov::hint::PerformanceMode::THROUGHPUT +:doc:`high-level performance hints <../../optimize-inference/high-level-performance-hints>`. Both performance hints ensure optimal portability and scalability of applications across various platforms and models. - ``ov::inference_num_threads`` limits the number of logical processors used for CPU inference. diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst index 0e3506484b42ae..1b9c5b89eff8bc 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst @@ -141,11 +141,11 @@ Supported Features The GPU plugin supports the following features: -Multi-device Execution +Automatic Device Selection +++++++++++++++++++++++++++++++++++++++ If a system has multiple GPUs (for example, an integrated and a discrete Intel GPU), then any supported model can be executed on all GPUs simultaneously. -It is done by specifying ``MULTI:GPU.1,GPU.0`` as a target device. +It is done by specifying ``AUTO:GPU.1,GPU.0`` as a target device, and adding the ``CUMULATIVE_THROUGHPUT`` parameter. .. tab-set:: @@ -154,17 +154,17 @@ It is done by specifying ``MULTI:GPU.1,GPU.0`` as a target device. .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.py :language: Python - :fragment: compile_model_multi + :fragment: compile_model_auto .. tab-item:: C++ :sync: cpp .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_gpu.cpp :language: cpp - :fragment: compile_model_multi + :fragment: compile_model_auto -For more details, see the :doc:`Multi-device execution `. +For more details, see the :doc:`Automatic Device Selection `. Automatic Batching +++++++++++++++++++++++++++++++++++++++ @@ -476,6 +476,7 @@ Since OpenVINO relies on the OpenCL kernels for the GPU implementation, many gen Additional Resources ####################################### +* `Working with GPUs in OpenVINO™ Notebook `__ * :doc:`Inference Devices and Modes <../inference-devices-and-modes>`. * :doc:`Optimization guide <../optimize-inference>`. * `GPU plugin developer documentation `__ diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution.rst index 24f820755555b0..a96ca304dac3e5 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution.rst @@ -18,6 +18,8 @@ Execution via the heterogeneous mode can be divided into two independent steps: 1. Setting hardware affinity to operations (`ov::Core::query_model `__ is used internally by the Hetero device). 2. Compiling a model to the Heterogeneous device assumes splitting the model to parts, compiling them on the specified devices (via `ov::device::priorities `__), and executing them in the Heterogeneous mode. The model is split to subgraphs in accordance with the affinities, where a set of connected operations with the same affinity is to be a dedicated subgraph. Each subgraph is compiled on a dedicated device and multiple `ov::CompiledModel `__ objects are made, which are connected via automatically allocated intermediate tensors. + + If you set pipeline parallelism (via ``ov::hint::model_distribution_policy``), the model is split into multiple stages, and each stage is assigned to a different device. The output of one stage is fed as input to the next stage. These two steps are not interconnected and affinities can be set in one of two ways, used separately or in combination (as described below): in the ``manual`` or the ``automatic`` mode. @@ -27,16 +29,21 @@ Defining and Configuring the Hetero Device Following the OpenVINO™ naming convention, the Hetero execution plugin is assigned the label of ``"HETERO".`` It may be defined with no additional parameters, resulting in defaults being used, or configured further with the following setup options: -+-------------------------------+--------------------------------------------+-----------------------------------------------------------+ -| Parameter Name & C++ property | Property values | Description | -+===============================+============================================+===========================================================+ -| | "MULTI_DEVICE_PRIORITIES" | | HETERO: | | Lists the devices available for selection. | -| | ``ov::device::priorities`` | | comma-separated, no spaces | | The device sequence will be taken as priority | -| | | | | | from high to low. | -+-------------------------------+--------------------------------------------+-----------------------------------------------------------+ - - -Manual and Automatic modes for assigning affinities ++--------------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------+ +| Parameter Name & C++ property | Property values | Description | ++============================================+=============================================================+===========================================================+ +| | "MULTI_DEVICE_PRIORITIES" | | ``HETERO: `` | | Lists the devices available for selection. | +| | ``ov::device::priorities`` | | | | The device sequence will be taken as priority | +| | | | comma-separated, no spaces | | from high to low. | ++--------------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------+ +| | | | ``empty`` | | Model distribution policy for inference with | +| | "MODEL_DISTRIBUTION_POLICY" | | ``ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL`` | | multiple devices. Distributes the model to multiple | +| | | | | | devices during model compilation. | +| | ``ov::hint::model_distribution_policy`` | | HETERO only supports PIPELINE_PARALLEL, The default value | | | +| | | | is empty | | | ++--------------------------------------------+-------------------------------------------------------------+-----------------------------------------------------------+ + +Manual and Automatic Modes for Assigning Affinities ################################################### The Manual Mode @@ -65,8 +72,11 @@ Randomly selecting operations and setting affinities may lead to decrease in mod :fragment: [set_manual_affinities] -The Automatic Mode -+++++++++++++++++++++ +Automatic Mode +++++++++++++++++++ + +Without Pipeline Parallelism +----------------------------- It decides automatically which operation is assigned to which device according to the support from dedicated devices (``GPU``, ``CPU``, etc.) and query model step is called implicitly by Hetero device during model compilation. @@ -90,6 +100,34 @@ It does not take into account device peculiarities such as the inability to infe :language: cpp :fragment: [compile_model] +Pipeline Parallelism (Preview) +-------------------------------- + +Pipeline parallelism is set via ``ov::hint::model_distribution_policy``. This mode is an efficient technique for inferring large models on multiple devices. The model is divided into multiple stages, with each stage assigned to a different device (``dGPU``, ``iGPU``, ``CPU``, etc.) in the sequence of device priority. This mode estimates memory size required by operations (includes weights memory and runtime memory), assigns operations (stage) to each device per the available memory size and considering the minimal data transfer between devices. Different stages are executed in sequence of model flow. + +.. note:: + + Since iGPU and CPU share the host memory and host resource should be always considered as a fallback, it is recommended to use at most one of the iGPU or CPU and put it at the end of device list. + + For large models that do not fit on a single first-priority device, model pipeline parallelism is employed. This technique distributes certain parts of the model across different devices, ensuring that each device has enough memory to infer the operations. + + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_hetero.py + :language: Python + :fragment: [set_pipeline_parallelism] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_hetero.cpp + :language: cpp + :fragment: [set_pipeline_parallelism] + Using Manual and Automatic Modes in Combination +++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index ea39001b4f3fd4..4c262b49f6f704 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -20,8 +20,9 @@ NPU Plugin is now available through all relevant OpenVINO distribution channels. NPU Plugin needs an NPU Driver to be installed on the system for both compiling and executing a model. Follow the instructions below to install the latest NPU drivers: -* Windows driver: https://www.intel.com/content/www/us/en/download/794734/intel-npu-driver-windows.html -* Linux driver: https://github.com/intel/linux-npu-driver/releases + +* `Windows driver `__ +* `Linux driver `__ The plugin uses the graph extension API exposed by the driver to convert the OpenVINO specific representation @@ -29,6 +30,25 @@ of the model into a proprietary format. The compiler included in the user mode d platform specific optimizations in order to efficiently schedule the execution of network layers and memory transactions on various NPU hardware submodules. +To use NPU for inference, pass the device name to the ``ov::Core::compile_model()`` method: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_npu.py + :language: py + :fragment: [compile_model_default_npu] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: docs/articles_en/assets/snippets/compile_model_npu.cpp + :language: cpp + :fragment: [compile_model_default_npu] + + Model Caching ############################# @@ -85,7 +105,7 @@ For more details about OpenVINO model caching, see the Supported Features and properties ####################################### -The NPU device is currently supported by AUTO and MULTI inference modes +The NPU device is currently supported by AUTO inference modes (HETERO execution is partially supported, for certain models). The NPU support in OpenVINO is still under active development and may @@ -106,9 +126,12 @@ offer a limited set of supported OpenVINO features. ov::hint::model_priority ov::hint::num_requests ov::hint::performance_mode + ov::hint::execution_mode ov::cache_dir ov::compilation_num_threads ov::enable_profiling + ov::workload_type + ov::intel_npu::compilation_mode_params .. tab-item:: Read-only properties @@ -120,10 +143,14 @@ offer a limited set of supported OpenVINO features. ov::range_for_async_infer_requests ov::range_for_streams ov::num_streams + ov::execution_devices ov::device::architecture ov::device::capabilities ov::device::full_name ov::device::uuid + ov::device::pci_info + ov::device::gops + ov::device::type ov::intel_npu::device_alloc_mem_size ov::intel_npu::device_total_mem_size ov::intel_npu::driver_version @@ -135,6 +162,60 @@ offer a limited set of supported OpenVINO features. based on the performance mode is **4 for THROUGHPUT** and **1 for LATENCY**. The default mode for the NPU device is LATENCY. +**ov::intel_npu::compilation_mode_params** + +``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows to +control model compilation for NPU. + +.. note:: + + The functionality is in experimental stage currently, can be a subject for + deprecation and may be replaced with generic OV API in future OV releases. + +Following configuration options are supported: + +**optimization-level** + +Defines a preset of optimization passes to be applied during compilation. + +.. list-table:: + :widths: 10 200 + :header-rows: 1 + + * - **Value** + - **Description** + * - 0 + - Reduced subset of optimization passes. Smaller compile time. + * - 1 + - **Default.** Balanced performance/compile time. + * - 2 + - Prioritize performance over compile time that may be an issue. + +**performance-hint-override** + +An extension for LATENCY mode being specified using ``ov::hint::performance_mode`` +Has no effect for other ``ov::hint::PerformanceMode`` hints. + +.. list-table:: + :widths: 10 200 + :header-rows: 1 + + * - **Value** + - **Description** + * - efficiency + - **Default.** Balanced performance and power consumption. + * - latency + - Prioritize performance over power efficiency. + +.. tab-set:: + + .. tab-item:: Usage example + + .. code-block:: + + map config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")}; + + compile_model(model, config); Limitations ############################# @@ -151,7 +232,8 @@ guaranteed. Additional Resources ############################# -* `Vision colorization Notebook `__ +* `Working with NPUs in OpenVINO™ Notebook `__ +* `Vision colorization Notebook <./../../../notebooks/vision-image-colorization-with-output.html>`__ * `Classification Benchmark C++ Demo `__ * `3D Human Pose Estimation Python Demo `__ * `Object Detection C++ Demo `__ diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties.rst index 6cc95a996bc629..5b3fd5c1dd7b51 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties.rst @@ -34,8 +34,7 @@ of ``ov::available_devices``, the string name of ``AVAILABLE_DEVICES`` and the t Refer to the :doc:`Hello Query Device C++ Sample <../../../learn-openvino/openvino-samples/hello-query-device>` -sources and the :doc:`Multi-Device execution ` documentation for examples of -using the setting and getting properties in user applications. +sources for an example of using the setting and getting properties in user applications. Get a Set of Available Devices @@ -259,28 +258,5 @@ Or the number of threads that would be used for inference on ``CPU`` device: :fragment: [inference_num_threads] -Setting Properties for Compiled Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -The only mode that supports this method is `Multi-Device execution <./multi-device>`__: - - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_properties_api.py - :language: py - :fragment: [multi_device] - - .. tab-item:: C++ - :sync: cpp - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_properties_api.cpp - :language: cpp - :fragment: [multi_device] - - diff --git a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application.rst b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application.rst index 222c8760d0a880..4829d75cd07127 100644 --- a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application.rst +++ b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application.rst @@ -76,6 +76,7 @@ Step 2. Compile the Model ######################### ``ov::CompiledModel`` class represents a device specific compiled model. ``ov::CompiledModel`` allows you to get information inputs or output ports by a tensor name or index. This approach is aligned with the majority of frameworks. +:doc:`AUTO mode <./inference-devices-and-modes/auto-device-selection>` automatically selects the most suitable hardware for running inference. Compile the model for a specific device using ``ov::Core::compile_model()``: @@ -228,7 +229,8 @@ For more details please read article about :doc:`OpenVINO™ Model representatio The code above creates a compiled model associated with a single hardware device from the model object. It is possible to create as many compiled models as needed and use them simultaneously (up to the limitation of the hardware). -To learn how to change the device configuration, read the :doc:`Query device properties ` article. +To learn more about supported devices and inference modes, read the :doc:`Inference Devices and Modes <./inference-devices-and-modes>` article. + Step 3. Create an Inference Request ################################### @@ -432,6 +434,7 @@ To build your project using CMake with the default build tools currently availab Additional Resources #################### +* `OpenVINO™ Runtime API Tutorial <./../../notebooks/openvino-api-with-output.html>`__ * See the :doc:`OpenVINO Samples <../../learn-openvino/openvino-samples>` page for specific examples of how OpenVINO pipelines are implemented for applications like image classification, text prediction, and many others. * Models in the OpenVINO IR format on `Hugging Face `__. * :doc:`OpenVINO™ Runtime Preprocessing ` diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst index 68e4abd528324d..10f6d7afdb0008 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst @@ -21,7 +21,12 @@ Below is a list of cases where input/output layout is important: * Applying the :doc:`preprocessing <../optimize-preprocessing>` steps, such as subtracting means, dividing by scales, resizing an image, and converting ``RGB`` <-> ``BGR``. * Setting/getting a batch for a model. -* Doing the same operations as used during the model conversion phase. For more information, refer to the :doc:`Model Optimizer Embedding Preprocessing Computation <../../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` guide. +* Doing the same operations as used during the model conversion phase. For more information, refer to the: + + * :doc:`Convert to OpenVINO <../../../model-preparation/convert-model-to-ir>` + * `OpenVINO Model Conversion Tutorial `__ + * :doc:`[LEGACY] Model Optimizer Embedding Preprocessing Computation <../../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` guide. + * Improving the readability of a model input and output. Syntax of Layout diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst index b23860e5bdd16d..044277a01ddc63 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst @@ -68,7 +68,7 @@ In general, most throughput-oriented inference applications should: Multi-Device Execution ###################### -OpenVINO offers the automatic, scalable :doc:`multi-device inference mode <../inference-devices-and-modes/multi-device>`, which is a simple *application-transparent* way to improve throughput. There is no need to re-architecture existing applications for any explicit multi-device support: no explicit network loading to each device, no separate per-device queues, no additional logic to balance inference requests between devices, etc. For the application using it, multi-device is like any other device, as it manages all processes internally. +OpenVINO offers the automatic, scalable :doc:`multi-device inference mode <../../../documentation/legacy-features/multi-device>`, which is a simple *application-transparent* way to improve throughput. There is no need to re-architecture existing applications for any explicit multi-device support: no explicit network loading to each device, no separate per-device queues, no additional logic to balance inference requests between devices, etc. For the application using it, multi-device is like any other device, as it manages all processes internally. Just like with other throughput-oriented scenarios, there are several major pre-requisites for optimal multi-device performance: * Using the :ref:`Asynchronous API ` and :doc:`callbacks <../integrate-openvino-with-your-application/inference-request>` in particular. @@ -78,5 +78,5 @@ Keep in mind that the resulting performance is usually a fraction of the "ideal" .. note:: - While the legacy approach of optimizing the parameters of each device separately works, the :doc:`OpenVINO performance hints ` allow configuring all devices (that are part of the specific multi-device configuration) at once. + While the legacy approach of optimizing the parameters of each device separately works, the :doc:`Automatic Device Selection <../inference-devices-and-modes/auto-device-selection>` allow configuring all devices (that are part of the specific multi-device configuration) at once. diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst index 67e70c9b999f0c..a350d1bcbb5a77 100644 --- a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst +++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst @@ -1,7 +1,7 @@ .. {#openvino_docs_OV_UG_ways_to_get_stateful_model} Obtaining a Stateful OpenVINO Model -==================================== +====================================== If the original framework does not offer a dedicated API for working with states, the resulting OpenVINO IR model will not be stateful by default. This means it will not contain @@ -23,7 +23,7 @@ and you have three ways to do it: .. _ov_ug_make_stateful: MakeStateful Transformation -########################### +############################### The MakeStateful transformation changes the structure of the model by replacing the user-defined pairs of Parameter and Results with the Assign and ReadValue operations: @@ -83,7 +83,7 @@ Parameter/Result tensor names. If there are no tensor names, .. _ov_ug_low_latency: LowLatency2 Transformation -########################## +############################### The LowLatency2 transformation changes the structure of a model containing :doc:`TensorIterator <../../../documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/tensor-iterator-1>` @@ -102,8 +102,7 @@ the current State API implementation. Input values are ignored, and the initial for the ReadValue operations are set to zeros unless the user specifies otherwise via :doc:`State API <../stateful-models>`. -Applying LowLatency2 Transformation -++++++++++++++++++++++++++++++++++++ +To apply LowLatency2 Transformation, follow the instruction below: 1. Get :doc:`ov::Model <../integrate-openvino-with-your-application/model-representation>`, for example: @@ -195,11 +194,11 @@ Applying LowLatency2 Transformation somewhere in the model. In such a case, trim non-reshapable layers via - :doc:`Model Optimizer command-line <../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes>` - arguments: ``--input`` and ``--output``. + :doc:`Conversion Parameters <../../model-preparation/conversion-parameters>`: + ``--input`` and ``--output``. For example, check the `OpenVINO Model Conversion Tutorial `__. - For example, the parameter and the problematic constant in the picture above can be - trimmed using the ``--input Reshape_layer_name`` command-line option. The problematic + As for the parameter and the problematic constant in the picture above, it can be + trimmed by using the ``--input Reshape_layer_name`` command-line option. The problematic constant can be also replaced using OpenVINO, as shown in the following example: .. tab-set:: @@ -210,27 +209,7 @@ Applying LowLatency2 Transformation :language: cpp :fragment: [ov:replace_const] - - -Obtaining TensorIterator/Loop Operations using Model Optimizer -############################################################### - -**ONNX and frameworks supported via ONNX format:** *LSTM, RNN, GRU* original layers are -converted to the GRU/RNN/LSTM Sequence operations. *ONNX Loop* layer is converted to the -OpenVINO Loop operation. - -**TensorFlow:** *BlockLSTM* is converted to a TensorIterator operation. TensorIterator -body contains LSTM Cell operation. Modifications such as Peepholes and InputForget are -not supported. The *While* layer is converted to a TensorIterator. TensorIterator body -can contain any supported operations. However, dynamic cases where the count of iterations -cannot be calculated during shape inference (Model Optimizer conversion) are not supported. - -**TensorFlow2:** *While* layer is converted to a Loop operation. The Loop body can contain -any supported operations. - - - -Creating a Model via OpenVINO API +Stateful Model from Scratch ################################## The main approach to obtaining stateful OpenVINO IR models is converting from other @@ -251,3 +230,17 @@ a sink from `ov::Model` after deleting the node from the graph with the `delete_ :language: cpp :fragment: [ov:state_network] +.. note:: + + **ONNX and frameworks supported via ONNX format:** *LSTM, RNN, GRU* original layers are + converted to the GRU/RNN/LSTM Sequence operations. *ONNX Loop* layer is converted to the + OpenVINO Loop operation. + + **TensorFlow:** *BlockLSTM* is converted to a TensorIterator operation. The TensorIterator + body contains LSTM Cell operation. Modifications such as Peepholes and InputForget are + not supported. The *While* layer is converted to a TensorIterator. The TensorIterator body + can contain any supported operations. However, dynamic cases where the count of iterations + cannot be calculated during shape inference are not supported. + + **TensorFlow2:** *While* layer is converted to a Loop operation. The Loop body can contain + any supported operations. diff --git a/docs/articles_en/openvino-workflow/torch-compile.rst b/docs/articles_en/openvino-workflow/torch-compile.rst index 280b7c01ca190e..759ca125d6dcd5 100644 --- a/docs/articles_en/openvino-workflow/torch-compile.rst +++ b/docs/articles_en/openvino-workflow/torch-compile.rst @@ -48,7 +48,7 @@ For OpenVINO installed via channels other than pip, such as conda, and versions .. image:: ../assets/images/torch_compile_backend_openvino.svg - :alt: torch.compile execution diagram + :alt: torch.compile execution diagram :width: 992px :height: 720px :scale: 60% @@ -196,7 +196,7 @@ The ``torch.compile`` feature is part of PyTorch 2.0, and is based on: (PEP 523) to dynamically modify Python bytecode right before it is executed (PyTorch operators that cannot be extracted to FX graph are executed in the native Python environment). It maintains the eager-mode capabilities using - `Guards `__ to ensure the + `Guards `__ to ensure the generated graphs are valid. * **AOTAutograd** - generates the backward graph corresponding to the forward graph captured by TorchDynamo. diff --git a/docs/dev/build_linux.md b/docs/dev/build_linux.md index 814deaa3deec3c..d3e1319f9d2c1a 100644 --- a/docs/dev/build_linux.md +++ b/docs/dev/build_linux.md @@ -12,13 +12,7 @@ The software was validated on: - [CMake](https://cmake.org/download/) 3.13 or higher - GCC 7.5 or higher to build OpenVINO Runtime - Python 3.8 - 3.11 for OpenVINO Runtime Python API -- (Optional) Install Intel® Graphics Compute Runtime for OpenCL™ Driver package to enable inference on Intel integrated GPUs. Select a driver package from the table below depending on what version of Ubuntu you are installing on. - - | Ubuntu | Driver package | - | --- | ----------- | - | 22.04 | [23.13.26032.30](https://github.com/intel/compute-runtime/releases/tag/23.13.26032.30) | - | 20.04 | [22.24.23453](https://github.com/intel/compute-runtime/releases/tag/22.24.23453) | - | 18.04 | [21.38.21026](https://github.com/intel/compute-runtime/releases/tag/21.38.21026) | +- (Optional) Install Intel® Graphics Compute Runtime for OpenCL™ Driver package to enable inference on Intel integrated GPUs. ## How to build @@ -45,6 +39,8 @@ The software was validated on: mkdir build && cd build ``` +> **NOTE**: It is recommended to disable the oneAPI environment before compiling OpenVINO from source on Linux, as it may cause build failures. + 4. OpenVINO Runtime uses a CMake-based build system. In the created `build` directory, run `cmake` to fetch project dependencies and create Unix makefiles, then run `make` to build the project: ```sh cmake -DCMAKE_BUILD_TYPE=Release .. diff --git a/docs/dev/build_windows.md b/docs/dev/build_windows.md index bc35d036579636..2515000e5ec4f0 100644 --- a/docs/dev/build_windows.md +++ b/docs/dev/build_windows.md @@ -6,20 +6,20 @@ Supported configurations: - Windows 10 x86 64-bit or higher with Visual Studio 2019 or higher build for X64 architecture. - Windows on ARM (shortly WoA) to build for ARM64 architecture. OpenVINO was validated on [Windows DevKit 2023](https://developer.qualcomm.com/hardware/windows-on-snapdragon/windows-dev-kit-2023) -## Software requirements +## Software requirements - [CMake](https://cmake.org/download/) 3.13 or higher - Microsoft Visual Studio 2019 or higher, version 16.3 or later - > **NOTE**: Native Microsoft Visual Studio for WoA is available since 2022. + > **NOTE**: Native Microsoft Visual Studio for WoA has been available since version 3.11. - Python 3.8 - 3.11 for OpenVINO Runtime Python API - > **NOTE**: Python for ARM64 is available since [3.11](https://www.python.org/downloads/windows/) version. + > **NOTE**: Python for ARM64 is available since [3.11](https://www.python.org/downloads/windows/) version. - [Git for Windows*] - (Windows on ARM only) [LLVM for Windows on ARM (WoA)](https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.6/LLVM-15.0.6-woa64.exe) - > **NOTE**: After installation, make sure `clang-cl` compiler is available from `PATH`. + > **NOTE**: After installation, make sure `clang-cl` compiler is available from `PATH`. ## How to build -> **NOTE**: By default, the build enables the OpenVINO Runtime GPU plugin to infer models on your Intel® Processor Graphics. This requires you to download and install the Intel® Graphics Driver for Windows (26.20) [driver package](https://www.intel.com/content/www/us/en/download/19344/intel-graphics-windows-dch-drivers.html) before running the build. If you don't want to use the GPU plugin, use the `-DENABLE_INTEL_GPU=OFF` CMake build option and skip the installation of the Intel® Graphics Driver. +> **NOTE**: By default, the build enables the OpenVINO Runtime GPU plugin to infer models on your Intel® Processor Graphics. This requires you to download and install the [Intel® Graphics Driver for Windows](https://www.intel.com/content/www/us/en/download/19344/intel-graphics-windows-dch-drivers.html) before running the build. If you don't want to use the GPU plugin, use the `-DENABLE_INTEL_GPU=OFF` CMake build option and skip the installation of the Intel® Graphics Driver. 1. Clone submodules: ```sh @@ -37,12 +37,12 @@ Supported configurations: ```sh cmake -G "Visual Studio 17 2022" ``` - + > **HINT**: **Generating PDB Files and Debugging Your Build**
> If you intend to generate PDB files and debug your build, it is essential to set the CMake build type appropriately. > You should utilize one of the following CMake build type options:
>* `-DCMAKE_BUILD_TYPE=RelWithDebInfo`: This option generates PDB files with release information, making it suitable for debugging optimized builds.
- >* `-DCMAKE_BUILD_TYPE=Debug`: This option generates PDB files optimized for debugging, providing comprehensive debugging information. + >* `-DCMAKE_BUILD_TYPE=Debug`: This option generates PDB files optimized for debugging, providing comprehensive debugging information. 4. Build generated solution in Visual Studio or run `cmake --build . --config Release --verbose -j` to build from the command line. View the number of available processing units with `WMIC cpu get numberofLogicalProcessors`. Be aware that this process may take some time. diff --git a/docs/dev/conditional_compilation.md b/docs/dev/conditional_compilation.md index 47ac0190be8d9f..64bfdb66ac18bd 100644 --- a/docs/dev/conditional_compilation.md +++ b/docs/dev/conditional_compilation.md @@ -77,6 +77,7 @@ The differences are only in the code usage analysis step. The analysis step shou ## Limitations * Currently, Ninja build system is not supported for the conditional compilation build. +* Currently, conditional compilation supports OpenVINO IR models only. To enable conditional compilation, the models from alternative frameworks (PyTorch, TensorFlow, ONNX, etc.) need to be converted to OpenVINO IR models first. ## See also * [OpenVINO™ README](../../README.md) diff --git a/docs/dev/index.md b/docs/dev/index.md index a07f9c30efaa22..cef96f4aa1003e 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -97,7 +97,6 @@ The OpenVINO Repository includes the following components. Click on the componen -src\core\README.md ### OpenVINO Components diff --git a/docs/dev/ov_dependencies_24_2.txt b/docs/dev/ov_dependencies.txt similarity index 99% rename from docs/dev/ov_dependencies_24_2.txt rename to docs/dev/ov_dependencies.txt index 0f24303ec47d0c..151be528f3519c 100644 --- a/docs/dev/ov_dependencies_24_2.txt +++ b/docs/dev/ov_dependencies.txt @@ -1,6 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -#This file provides a comprehensive list of all dependencies of OpenVINO 2024.2 +#This file provides a comprehensive list of all dependencies of OpenVINO 2024.3 #The file is part of the automation pipeline for posting OpenVINO IR models on the HuggingFace Hub, including OneBOM dependency checks. diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index f40a2a2ff9705a..f670e7fd4072b8 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20240619220807/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20240712220852/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index fc8fe233d35788..05c56d0ad4a5a9 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -26,8 +26,8 @@ been using JupyterLab to run the demo as suggested in the recommended browser on one of the following operating systems:* *Ubuntu, Windows: Chrome* *macOS: Safari* -Table of contents: -^^^^^^^^^^^^^^^^^^ +**Table of contents:** + - `Prerequisites <#prerequisites>`__ - `Imports <#imports>`__ @@ -52,7 +52,7 @@ Prerequisites -**The** ``pythreejs`` **extension may not display properly when using a +**The ``pythreejs`` extension may not display properly when using a Jupyter Notebook release. Therefore, it is recommended to use Jupyter Lab instead.** @@ -74,61 +74,61 @@ Lab instead.** Using cached https://download.pytorch.org/whl/cpu/torch-2.3.1%2Bcpu-cp38-cp38-linux_x86_64.whl (190.4 MB) Collecting onnx Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.3) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.3) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) - Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) Collecting networkx<=3.1.0 (from openvino-dev>=2024.0.0) Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting openvino-telemetry>=2023.2.1 (from openvino-dev>=2024.0.0) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) - Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.1) - Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) + Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.1) + Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) Collecting openvino==2024.2.0 (from openvino-dev>=2024.0.0) Using cached openvino-2024.2.0-15519-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.9 kB) Collecting filelock (from torch) - Downloading filelock-3.15.3-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Using cached filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) - Using cached sympy-1.12.1-py3-none-any.whl.metadata (12 kB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Using cached sympy-1.13.0-py3-none-any.whl.metadata (12 kB) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) - Using cached fsspec-2024.6.0-py3-none-any.whl.metadata (11 kB) + Using cached fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB) Collecting protobuf>=3.20.2 (from onnx) - Using cached protobuf-5.27.1-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) + Using cached protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.11 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.11) - Requirement already satisfied: jupyterlab-widgets~=3.0.11 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.11) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.7) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.2) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.6.2) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) - Collecting mpmath<1.4.0,>=1.1.0 (from sympy->torch) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.11 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.11) + Requirement already satisfied: jupyterlab-widgets~=3.0.11 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.11) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.3.2) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.7) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.2) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.7.4) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.47) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.0.1) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.47) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.0.1) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) Using cached openvino_dev-2024.2.0-15519-py3-none-any.whl (4.7 MB) Using cached openvino-2024.2.0-15519-cp38-cp38-manylinux2014_x86_64.whl (38.7 MB) @@ -138,13 +138,13 @@ Lab instead.** Using cached networkx-3.1-py3-none-any.whl (2.1 MB) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) - Using cached protobuf-5.27.1-cp38-abi3-manylinux2014_x86_64.whl (309 kB) - Downloading filelock-3.15.3-py3-none-any.whl (16 kB) - Using cached fsspec-2024.6.0-py3-none-any.whl (176 kB) - Using cached sympy-1.12.1-py3-none-any.whl (5.7 MB) + Using cached protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl (309 kB) + Using cached filelock-3.15.4-py3-none-any.whl (16 kB) + Using cached fsspec-2024.6.1-py3-none-any.whl (177 kB) + Using cached sympy-1.13.0-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) Installing collected packages: openvino-telemetry, mpmath, traittypes, sympy, protobuf, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, onnx, openvino-dev, ipydatawidgets, pythreejs - Successfully installed filelock-3.15.3 fsspec-2024.6.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.2.0 openvino-dev-2024.2.0 openvino-telemetry-2024.1.0 protobuf-5.27.1 pythreejs-2.4.2 sympy-1.12.1 torch-2.3.1+cpu traittypes-0.2.1 + Successfully installed filelock-3.15.4 fsspec-2024.6.1 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.2.0 openvino-dev-2024.2.0 openvino-telemetry-2024.1.0 protobuf-5.27.2 pythreejs-2.4.2 sympy-1.13.0 torch-2.3.1+cpu traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -252,19 +252,19 @@ IR format. .. parsed-literal:: ========== Converting human-pose-estimation-3d-0001 to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx ONNX check passed successfully. ========== Converting human-pose-estimation-3d-0001 to IR (FP32) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin diff --git a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst index 19c23d62bf48f2..af821793889b06 100644 --- a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst @@ -22,8 +22,8 @@ segmentation, to scene semantic parsing. It is highly efficient and effective, showing strong performance on par or even better than state of the art. -Table of contents: -^^^^^^^^^^^^^^^^^^ +**Table of contents:** + - `Imports <#imports>`__ - `Prepare the Model <#prepare-the-model>`__ @@ -36,9 +36,9 @@ Table of contents: .. code:: ipython3 import platform - + %pip install -q "openvino>=2023.1.0" "tqdm" - + if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" else: @@ -63,15 +63,15 @@ Imports import numpy as np import matplotlib.pyplot as plt import openvino as ov - + # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import download_file Prepare the Model @@ -110,9 +110,9 @@ API, see this .. code:: ipython3 ir_model_xml = onnx_model_path.with_suffix(".xml") - + core = ov.Core() - + if not ir_model_xml.exists(): # Convert model to OpenVINO Model model = ov.convert_model(onnx_model_path) @@ -132,37 +132,37 @@ Data Processing Module def load_data(point_file: Union[str, Path]): """ Load the point cloud data and convert it to ndarray - + Parameters: point_file: string, path of .pts data Returns: point_set: point clound represented in np.array format """ - + point_set = np.loadtxt(point_file).astype(np.float32) - + # normailization point_set = point_set - np.expand_dims(np.mean(point_set, axis=0), 0) # center dist = np.max(np.sqrt(np.sum(point_set**2, axis=1)), 0) point_set = point_set / dist # scale - + return point_set - - + + def visualize(point_set: np.ndarray): """ Create a 3D view for data visualization - + Parameters: point_set: np.ndarray, the coordinate data in X Y Z format """ - + fig = plt.figure(dpi=192, figsize=(4, 4)) ax = fig.add_subplot(111, projection="3d") X = point_set[:, 0] Y = point_set[:, 2] Z = point_set[:, 1] - + # Scale the view of each axis to adapt to the coordinate data distribution max_range = np.array([X.max() - X.min(), Y.max() - Y.min(), Z.max() - Z.min()]).max() * 0.5 mid_x = (X.max() + X.min()) * 0.5 @@ -171,12 +171,12 @@ Data Processing Module ax.set_xlim(mid_x - max_range, mid_x + max_range) ax.set_ylim(mid_y - max_range, mid_y + max_range) ax.set_zlim(mid_z - max_range, mid_z + max_range) - + plt.tick_params(labelsize=5) ax.set_xlabel("X", fontsize=10) ax.set_ylabel("Y", fontsize=10) ax.set_zlabel("Z", fontsize=10) - + return ax Visualize the original 3D data @@ -196,7 +196,7 @@ chair for example. "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/pts/chair.pts", directory="data", ) - + points = load_data(str(point_data)) X = points[:, 0] Y = points[:, 2] @@ -216,7 +216,7 @@ chair for example. .. parsed-literal:: - /tmp/ipykernel_1006815/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_113341/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter3D(X, Y, Z, s=5, cmap="jet", marker="o", label="chair") @@ -239,11 +239,11 @@ each input point. # Parts of a chair classes = ["back", "seat", "leg", "arm"] - + # Preprocess the input data point = points.transpose(1, 0) point = np.expand_dims(point, axis=0) - + # Print info about model input and output shape print(f"input shape: {model.input(0).partial_shape}") print(f"output shape: {model.output(0).partial_shape}") @@ -265,14 +265,14 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - + device = widgets.Dropdown( options=core.available_devices + ["AUTO"], value="AUTO", description="Device:", disabled=False, ) - + device @@ -290,7 +290,7 @@ select device from dropdown list for running inference using OpenVINO compiled_model = core.compile_model(model=model, device_name=device.value) output_layer = compiled_model.output(0) result = compiled_model([point])[output_layer] - + # Find the label map for all points of chair with highest confidence pred = np.argmax(result[0], axis=1) ax = visualize(point) @@ -306,10 +306,10 @@ select device from dropdown list for running inference using OpenVINO XCur = np.array(XCur) YCur = np.array(YCur) ZCur = np.array(ZCur) - + # add current point of the part ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) - + ax.set_title("3D Segmentation Visualization") plt.legend(loc="upper right", fontsize=8) plt.show() @@ -317,7 +317,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - /tmp/ipykernel_1006815/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_113341/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) diff --git a/docs/notebooks/action-recognition-webcam-with-output.rst b/docs/notebooks/action-recognition-webcam-with-output.rst index 3b3455ba9b487a..8fae9c85aa41ab 100644 --- a/docs/notebooks/action-recognition-webcam-with-output.rst +++ b/docs/notebooks/action-recognition-webcam-with-output.rst @@ -35,8 +35,8 @@ Transformer and `ResNet34 `__. -Table of contents: -^^^^^^^^^^^^^^^^^^ +**Table of contents:** + - `Imports <#imports>`__ - `The models <#the-models>`__ @@ -76,18 +76,18 @@ Imports import os import time from typing import Tuple, List - + from pathlib import Path - + import cv2 import numpy as np from IPython import display import openvino as ov from openvino.runtime.ie_api import CompiledModel - + # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) @@ -132,7 +132,7 @@ and the system automatically downloads the two models model_path_encoder = f"model/intel/{model_name}/{model_name}-encoder/{precision}/{model_name}-encoder.xml" encoder_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/temp/{model_name}/{model_name}-encoder/{precision}/{model_name}-encoder.xml" decoder_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/temp/{model_name}/{model_name}-decoder/{precision}/{model_name}-decoder.xml" - + if not os.path.exists(model_path_decoder): utils.download_ir_model(decoder_url, Path(model_path_decoder).parent) if not os.path.exists(model_path_encoder): @@ -171,10 +171,10 @@ also provides the text file embedded into this notebook. "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/kinetics.txt", directory="data", ) - + with vocab_file_path.open(mode="r") as f: labels = [line.strip() for line in f] - + print(labels[0:9], np.shape(labels)) @@ -211,7 +211,7 @@ Select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -219,7 +219,7 @@ Select device from dropdown list for running inference using OpenVINO description="Device:", disabled=False, ) - + device @@ -240,13 +240,13 @@ Model Initialization function # Initialize OpenVINO Runtime. core = ov.Core() - - + + def model_init(model_path: str, device: str) -> Tuple: """ Read the network and weights from a file, load the model on CPU and get input and output names of nodes - + :param: model: model architecture path *.xml device: inference device @@ -255,7 +255,7 @@ Model Initialization function input_key: Input node for model output_key: Output node for model """ - + # Read the network and corresponding weights from a file. model = core.read_model(model=model_path) # Compile the model for specified device. @@ -276,7 +276,7 @@ Initialization for Encoder and Decoder input_key_en, output_keys_en, compiled_model_en = model_init(model_path_encoder, device.value) # Decoder initialization input_key_de, output_keys_de, compiled_model_de = model_init(model_path_decoder, device.value) - + # Get input size - Encoder. height_en, width_en = list(input_key_en.shape)[2:] # Get input size - Decoder. @@ -303,7 +303,7 @@ frames: def center_crop(frame: np.ndarray) -> np.ndarray: """ Center crop squared the original frame to standardize the input image to the encoder model - + :param frame: input frame :returns: center-crop-squared frame """ @@ -313,12 +313,12 @@ frames: start_y = int((img_h - min_dim) / 2.0) roi = [start_y, (start_y + min_dim), start_x, (start_x + min_dim)] return frame[start_y : (start_y + min_dim), start_x : (start_x + min_dim), ...], roi - - + + def adaptive_resize(frame: np.ndarray, size: int) -> np.ndarray: """ The frame going to be resized to have a height of size or a width of size - + :param frame: input frame :param size: input size to encoder model :returns: resized frame, np.array type @@ -329,12 +329,12 @@ frames: if w_scaled == w and h_scaled == h: return frame return cv2.resize(frame, (w_scaled, h_scaled)) - - + + def decode_output(probs: np.ndarray, labels: np.ndarray, top_k: int = 3) -> np.ndarray: """ Decodes top probabilities into corresponding label names - + :param probs: confidence vector for 400 actions :param labels: list of actions :param top_k: The k most probable positions in the list of labels @@ -347,18 +347,18 @@ frames: top_probs = np.array(probs)[0][top_ind.astype(int)] decoded_top_probs = [top_probs[0][0], top_probs[0][1], top_probs[0][2]] return decoded_labels, decoded_top_probs - - + + def rec_frame_display(frame: np.ndarray, roi) -> np.ndarray: """ Draw a rec frame over actual frame - + :param frame: input frame :param roi: Region of interest, image section processed by the Encoder :returns: frame with drawed shape - + """ - + cv2.line(frame, (roi[2] + 3, roi[0] + 3), (roi[2] + 3, roi[0] + 100), (0, 200, 0), 2) cv2.line(frame, (roi[2] + 3, roi[0] + 3), (roi[2] + 100, roi[0] + 3), (0, 200, 0), 2) cv2.line(frame, (roi[3] - 3, roi[1] - 3), (roi[3] - 3, roi[1] - 100), (0, 200, 0), 2) @@ -377,16 +377,16 @@ frames: cv2.putText(frame, "ROI", org2, FONT_STYLE, FONT_SIZE, FONT_COLOR2) cv2.putText(frame, "ROI", org, FONT_STYLE, FONT_SIZE, FONT_COLOR) return frame - - + + def display_text_fnc(frame: np.ndarray, display_text: str, index: int): """ Include a text on the analyzed frame - + :param frame: input frame :param display_text: text to add on the frame :param index: index line dor adding text - + """ # Configuration for displaying images with text. FONT_COLOR = (255, 255, 255) @@ -427,7 +427,7 @@ Following the pipeline above, you will use the next functions to: and cropped, centered, and squared so that both width and height have lengths "size". The frame must be transposed from Height-Width-Channels (HWC) to Channels-Height-Width (CHW). - + :param frame: input frame :param size: input size to encoder model :returns: resized and cropped frame @@ -439,25 +439,25 @@ Following the pipeline above, you will use the next functions to: # Transpose frame HWC -> CHW preprocessed = preprocessed.transpose((2, 0, 1))[None,] # HWC -> CHW return preprocessed, roi - - + + def encoder(preprocessed: np.ndarray, compiled_model: CompiledModel) -> List: """ Encoder Inference per frame. This function calls the network previously configured for the encoder model (compiled_model), extracts the data from the output node, and appends it in an array to be used by the decoder. - + :param: preprocessed: preprocessing frame :param: compiled_model: Encoder model network :returns: encoder_output: embedding layer that is appended with each arriving frame """ output_key_en = compiled_model.output(0) - + # Get results on action-recognition-0001-encoder model infer_result_encoder = compiled_model([preprocessed])[output_key_en] return infer_result_encoder - - + + def decoder(encoder_output: List, compiled_model_de: CompiledModel) -> List: """ Decoder inference per set of frames. This function concatenates the embedding layer @@ -465,7 +465,7 @@ Following the pipeline above, you will use the next functions to: Calls the network previously configured for the decoder model (compiled_model_de), extracts the logits and normalize those to get confidence values along specified axis. Decodes top probabilities into corresponding label names - + :param: encoder_output: embedding layer for 16 frames :param: compiled_model_de: Decoder model network :returns: decoded_labels: The k most probable actions from the labels list @@ -484,8 +484,8 @@ Following the pipeline above, you will use the next functions to: # Decodes top probabilities into corresponding label names decoded_labels, decoded_top_probs = decode_output(probs, labels, top_k=3) return decoded_labels, decoded_top_probs - - + + def softmax(x: np.ndarray) -> np.ndarray: """ Normalizes logits to get confidence values along specified axis @@ -526,13 +526,13 @@ either a webcam or a video file. See the list of procedures below: 4. Encoder Inference per frame 5. Decoder inference per set of frames 6. Visualize the results - + :param: source: webcam "0" or video path :param: flip: to be used by VideoPlayer function for flipping capture image :param: use_popup: False for showing encoded frames over this notebook, True for creating a popup window. :param: skip_first_frames: Number of frames to skip at the beginning of the video. :returns: display video over the notebook or in a popup window - + """ size = height_en # Endoder input size - From Cell 5_9 sample_duration = frames2decode # Decoder input size - From Cell 5_7 @@ -547,7 +547,7 @@ either a webcam or a video file. See the list of procedures below: if use_popup: title = "Press ESC to Exit" cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE) - + processing_times = collections.deque() processing_time = 0 encoder_output = [] @@ -557,56 +557,56 @@ either a webcam or a video file. See the list of procedures below: # Create a text template to show inference results over video. text_inference_template = "Infer Time:{Time:.1f}ms,{fps:.1f}FPS" text_template = "{label},{conf:.2f}%" - + while True: counter = counter + 1 - + # Read a frame from the video stream. frame = player.next() if frame is None: print("Source ended") break - + scale = 1280 / max(frame.shape) - + # Adaptative resize for visualization. if scale < 1: frame = cv2.resize(frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA) - + # Select one frame every two for processing through the encoder. # After 16 frames are processed, the decoder will find the action, # and the label will be printed over the frames. - + if counter % 2 == 0: # Preprocess frame before Encoder. (preprocessed, _) = preprocessing(frame, size) - + # Measure processing time. start_time = time.time() - + # Encoder Inference per frame encoder_output.append(encoder(preprocessed, compiled_model_en)) - + # Decoder inference per set of frames # Wait for sample duration to work with decoder model. if len(encoder_output) == sample_duration: decoded_labels, decoded_top_probs = decoder(encoder_output, compiled_model_de) encoder_output = [] - + # Inference has finished. Display the results. stop_time = time.time() - + # Calculate processing time. processing_times.append(stop_time - start_time) - + # Use processing times from last 200 frames. if len(processing_times) > 200: processing_times.popleft() - + # Mean processing time [ms] processing_time = np.mean(processing_times) * 1000 fps = 1000 / processing_time - + # Visualize the results. for i in range(0, 3): display_text = text_template.format( @@ -614,10 +614,10 @@ either a webcam or a video file. See the list of procedures below: conf=decoded_top_probs[i] * 100, ) display_text_fnc(frame, display_text, i) - + display_text = text_inference_template.format(Time=processing_time, fps=fps) display_text_fnc(frame, display_text, 3) - + # Use this workaround if you experience flickering. if use_popup: cv2.imshow(title, frame) @@ -633,7 +633,7 @@ either a webcam or a video file. See the list of procedures below: # Display the image in this notebook. display.clear_output(wait=True) display.display(i) - + # ctrl-c except KeyboardInterrupt: print("Interrupted") @@ -671,10 +671,10 @@ multi-camera systems). .. code:: ipython3 USE_WEBCAM = False - + cam_id = 0 video_file = "https://archive.org/serve/ISSVideoResourceLifeOnStation720p/ISS%20Video%20Resource_LifeOnStation_720p.mp4" - + source = cam_id if USE_WEBCAM else video_file additional_options = {"skip_first_frames": 600, "flip": False} if not USE_WEBCAM else {"flip": True} run_action_recognition(source=source, use_popup=False, **additional_options) diff --git a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png index c504ef5bf83bf4..758fc8d9ce5bcc 100644 --- a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png +++ b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d012f16703650c5879a6b9393860504b16b8515f1ed19faff89c314e5e129ab7 -size 69133 +oid sha256:cddaebbb286d5ba5fef68949bc74b271445d21e3eecdcad88b3563ef350bd93f +size 68589 diff --git a/docs/notebooks/all_notebooks_paths.txt b/docs/notebooks/all_notebooks_paths.txt index 8a7ff229db33f3..b5e54584e99d95 100644 --- a/docs/notebooks/all_notebooks_paths.txt +++ b/docs/notebooks/all_notebooks_paths.txt @@ -12,12 +12,11 @@ notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb notebooks/clip-zero-shot-image-classification/clip-zero-shot-classification.ipynb notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb notebooks/convert-to-openvino/convert-to-openvino.ipynb -notebooks/convert-to-openvino/legacy-mo-convert-to-openvino.ipynb notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb notebooks/ct-segmentation-quantize/ct-segmentation-quantize-nncf.ipynb notebooks/ddcolor-image-colorization/ddcolor-image-colorization.ipynb -notebooks/decidiffusion-image-generation/decidiffusion-image-generation.ipynb notebooks/depth-anything/depth-anything.ipynb +notebooks/depth-anything/depth-anything-v2.ipynb notebooks/detectron2-to-openvino/detectron2-to-openvino.ipynb notebooks/distilbert-sequence-classification/distilbert-sequence-classification.ipynb notebooks/distil-whisper-asr/distil-whisper-asr.ipynb @@ -25,6 +24,7 @@ notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb notebooks/dynamicrafter-animating-images/dynamicrafter-animating-images.ipynb notebooks/efficient-sam/efficient-sam.ipynb notebooks/encodec-audio-compression/encodec-audio-compression.ipynb +notebooks/explainable-ai-1-basic/explainable-ai-1-basic.ipynb notebooks/fast-segment-anything/fast-segment-anything.ipynb notebooks/film-slowmo/film-slowmo.ipynb notebooks/freevc-voice-conversion/freevc-voice-conversion.ipynb @@ -42,6 +42,7 @@ notebooks/image-bind/image-bind.ipynb notebooks/image-classification-quantization/image-classification-quantization.ipynb notebooks/instant-id/instant-id.ipynb notebooks/instruct-pix2pix-image-editing/instruct-pix2pix-image-editing.ipynb +notebooks/jina-clip/jina-clip.ipynb notebooks/knowledge-graphs-conve/knowledge-graphs-conve.ipynb notebooks/kosmos2-multimodal-large-language-model/kosmos2-multimodal-large-language-model.ipynb notebooks/language-quantize-bert/language-quantize-bert.ipynb @@ -51,13 +52,16 @@ notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb -notebooks/llm-agent-langchain/llm-agent-langchain.ipynb +notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb +notebooks/llm-agent-react/llm-agent-react-langchain.ipynb notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb notebooks/llm-chatbot/llm-chatbot.ipynb notebooks/llm-question-answering/llm-question-answering.ipynb notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb notebooks/magika-content-type-recognition/magika-content-type-recognition.ipynb notebooks/meter-reader/meter-reader.ipynb +notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb notebooks/mms-massively-multilingual-speech/mms-massively-multilingual-speech.ipynb notebooks/mobileclip-video-search/mobileclip-video-search.ipynb notebooks/mobilevlm-language-assistant/mobilevlm-language-assistant.ipynb @@ -80,8 +84,8 @@ notebooks/person-counting-webcam/person-counting.ipynb notebooks/person-tracking-webcam/person-tracking.ipynb notebooks/photo-maker/photo-maker.ipynb notebooks/pix2struct-docvqa/pix2struct-docvqa.ipynb +notebooks/pixart/pixart.ipynb notebooks/pose-estimation-webcam/pose-estimation.ipynb -notebooks/pyannote-speaker-diarization/pyannote-speaker-diarization.ipynb notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb @@ -102,9 +106,7 @@ notebooks/sound-generation-audioldm2/sound-generation-audioldm2.ipynb notebooks/sparsity-optimization/sparsity-optimization.ipynb notebooks/speculative-sampling/speculative-sampling.ipynb notebooks/speechbrain-emotion-recognition/speechbrain-emotion-recognition.ipynb -notebooks/speech-recognition-quantization/speech-recognition-quantization-data2vec.ipynb notebooks/speech-recognition-quantization/speech-recognition-quantization-wav2vec2.ipynb -notebooks/speech-to-text/speech-to-text.ipynb notebooks/stable-cascade-image-generation/stable-cascade-image-generation.ipynb notebooks/stable-diffusion-ip-adapter/stable-diffusion-ip-adapter.ipynb notebooks/stable-diffusion-keras-cv/stable-diffusion-keras-cv.ipynb @@ -114,6 +116,7 @@ notebooks/stable-diffusion-v2/stable-diffusion-v2-infinite-zoom.ipynb notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb notebooks/stable-diffusion-v2/stable-diffusion-v2-text-to-image-demo.ipynb notebooks/stable-diffusion-v2/stable-diffusion-v2-text-to-image.ipynb +notebooks/stable-diffusion-v3/stable-diffusion-v3.ipynb notebooks/stable-diffusion-xl/segmind-vegart.ipynb notebooks/stable-diffusion-xl/stable-diffusion-xl.ipynb notebooks/stable-video-diffusion/stable-video-diffusion.ipynb @@ -134,10 +137,7 @@ notebooks/triposr-3d-reconstruction/triposr-3d-reconstruction.ipynb notebooks/typo-detector/typo-detector.ipynb notebooks/vehicle-detection-and-recognition/vehicle-detection-and-recognition.ipynb notebooks/vision-background-removal/vision-background-removal.ipynb -notebooks/vision-image-colorization/vision-image-colorization.ipynb notebooks/vision-monodepth/vision-monodepth.ipynb -notebooks/vision-paddlegan-anime/vision-paddlegan-anime.ipynb -notebooks/vision-paddlegan-superresolution/vision-paddlegan-superresolution.ipynb notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb notebooks/wuerstchen-image-generation/wuerstchen-image-generation.ipynb notebooks/yolov10-optimization/yolov10-optimization.ipynb diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst index f4a4d2a52d7d7f..64214842801591 100644 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst @@ -14,8 +14,8 @@ small parameter count and few forward pass generation process, amused can generate many images quickly. This benefit is seen particularly at larger batch sizes. -Table of contents: -^^^^^^^^^^^^^^^^^^ +**Table of contents:** + - `Prerequisites <#prerequisites>`__ - `Load and run the original @@ -65,23 +65,17 @@ Load and run the original pipeline import torch from diffusers import AmusedPipeline - - + + pipe = AmusedPipeline.from_pretrained( "amused/amused-256", ) - + prompt = "kind smiling ghost" image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] image.save("text2image_256.png") -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/transformers/transformer_2d.py:34: FutureWarning: `Transformer2DModelOutput` is deprecated and will be removed in version 1.0.0. Importing `Transformer2DModelOutput` from `diffusers.models.transformer_2d` is deprecated and this will be removed in a future version. Please use `from diffusers.models.modeling_outputs import Transformer2DModelOutput`, instead. - deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message) - - .. parsed-literal:: @@ -130,8 +124,8 @@ Define paths for converted models: .. code:: ipython3 from pathlib import Path - - + + TRANSFORMER_OV_PATH = Path("models/transformer_ir.xml") TEXT_ENCODER_OV_PATH = Path("models/text_encoder_ir.xml") VQVAE_OV_PATH = Path("models/vqvae_ir.xml") @@ -144,10 +138,10 @@ file. .. code:: ipython3 import torch - + import openvino as ov - - + + def convert(model: torch.nn.Module, xml_path: str, example_input): xml_path = Path(xml_path) if not xml_path.exists(): @@ -155,7 +149,7 @@ file. with torch.no_grad(): converted_model = ov.convert_model(model, example_input=example_input) ov.save_model(converted_model, xml_path, compress_to_fp16=False) - + # cleanup memory torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() @@ -172,17 +166,17 @@ Convert the Text Encoder def __init__(self, text_encoder): super().__init__() self.text_encoder = text_encoder - + def forward(self, input_ids=None, return_dict=None, output_hidden_states=None): outputs = self.text_encoder( input_ids=input_ids, return_dict=return_dict, output_hidden_states=output_hidden_states, ) - + return outputs.text_embeds, outputs.last_hidden_state, outputs.hidden_states - - + + input_ids = pipe.tokenizer( prompt, return_tensors="pt", @@ -190,41 +184,41 @@ Convert the Text Encoder truncation=True, max_length=pipe.tokenizer.model_max_length, ) - + input_example = { "input_ids": input_ids.input_ids, "return_dict": torch.tensor(True), "output_hidden_states": torch.tensor(True), } - + convert(TextEncoderWrapper(pipe.text_encoder), TEXT_ENCODER_OV_PATH, input_example) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4481: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4565: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:622: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:621: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:627: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:626: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:276: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:275: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:284: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:316: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:315: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:650: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:649: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:653: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:652: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:745: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:744: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1230: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1231: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: @@ -239,7 +233,7 @@ Convert the U-ViT transformer def __init__(self, transformer): super().__init__() self.transformer = transformer - + def forward( self, latents=None, @@ -253,21 +247,21 @@ Convert the U-ViT transformer pooled_text_emb=pooled_text_emb, encoder_hidden_states=encoder_hidden_states, ) - - + + shape = (1, 16, 16) latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) latents = torch.cat([latents] * 2) - - + + example_input = { "latents": latents, "micro_conds": torch.rand([2, 5], dtype=torch.float32), "pooled_text_emb": torch.rand([2, 768], dtype=torch.float32), "encoder_hidden_states": torch.rand([2, 77, 768], dtype=torch.float32), } - - + + pipe.transformer.eval() w_transformer = TransformerWrapper(pipe.transformer) convert(w_transformer, TRANSFORMER_OV_PATH, example_input) @@ -275,7 +269,7 @@ Convert the U-ViT transformer Convert VQ-GAN decoder (VQVAE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Function ``get_latents`` is +Function ``get_latents`` is needed to return real latents for the conversion. Due to the VQVAE implementation autogenerated tensor of the required shape is not suitable. This function repeats part of ``AmusedPipeline``. @@ -286,7 +280,7 @@ suitable. This function repeats part of ``AmusedPipeline``. shape = (1, 16, 16) latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) model_input = torch.cat([latents] * 2) - + model_output = pipe.transformer( model_input, micro_conds=torch.rand([2, 5], dtype=torch.float32), @@ -296,50 +290,50 @@ suitable. This function repeats part of ``AmusedPipeline``. guidance_scale = 10.0 uncond_logits, cond_logits = model_output.chunk(2) model_output = uncond_logits + guidance_scale * (cond_logits - uncond_logits) - + latents = pipe.scheduler.step( model_output=model_output, timestep=torch.tensor(0), sample=latents, ).prev_sample - + return latents - - + + class VQVAEWrapper(torch.nn.Module): def __init__(self, vqvae): super().__init__() self.vqvae = vqvae - + def forward(self, latents=None, force_not_quantize=True, shape=None): outputs = self.vqvae.decode( latents, force_not_quantize=force_not_quantize, shape=shape.tolist(), ) - + return outputs - - + + latents = get_latents() example_vqvae_input = { "latents": latents, "force_not_quantize": torch.tensor(True), "shape": torch.tensor((1, 16, 16, 64)), } - + convert(VQVAEWrapper(pipe.vqvae), VQVAE_OV_PATH, example_vqvae_input) .. parsed-literal:: - /tmp/ipykernel_1007584/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /tmp/ipykernel_114139/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -353,8 +347,8 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 import ipywidgets as widgets - - + + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -362,7 +356,7 @@ Select device from dropdown list for running inference using OpenVINO. description="Device:", disabled=False, ) - + device @@ -387,29 +381,29 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. .. code:: ipython3 from collections import namedtuple - - + + class ConvTextEncoderWrapper(torch.nn.Module): def __init__(self, text_encoder, config): super().__init__() self.config = config self.text_encoder = text_encoder - + def forward(self, input_ids=None, return_dict=None, output_hidden_states=None): inputs = { "input_ids": input_ids, "return_dict": return_dict, "output_hidden_states": output_hidden_states, } - + outs = self.text_encoder(inputs) - + outputs = namedtuple("CLIPTextModelOutput", ("text_embeds", "last_hidden_state", "hidden_states")) - + text_embeds = torch.from_numpy(outs[0]) last_hidden_state = torch.from_numpy(outs[1]) hidden_states = list(torch.from_numpy(out) for out in outs.values())[2:] - + return outputs(text_embeds, last_hidden_state, hidden_states) .. code:: ipython3 @@ -419,7 +413,7 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. super().__init__() self.config = config self.transformer = transformer - + def forward(self, latents=None, micro_conds=None, pooled_text_emb=None, encoder_hidden_states=None, **kwargs): outputs = self.transformer( { @@ -430,7 +424,7 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. }, share_inputs=False, ) - + return torch.from_numpy(outputs[0]) .. code:: ipython3 @@ -441,17 +435,17 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. self.vqvae = vqvae self.dtype = dtype self.config = config - + def decode(self, latents=None, force_not_quantize=True, shape=None): inputs = { "latents": latents, "force_not_quantize": force_not_quantize, "shape": torch.tensor(shape), } - + outs = self.vqvae(inputs) outs = namedtuple("VQVAE", "sample")(torch.from_numpy(outs[0])) - + return outs And insert wrappers instances in the pipeline: @@ -459,25 +453,25 @@ And insert wrappers instances in the pipeline: .. code:: ipython3 prompt = "kind smiling ghost" - + transformer = pipe.transformer vqvae = pipe.vqvae text_encoder = pipe.text_encoder - + pipe.__dict__["_internal_dict"]["_execution_device"] = pipe._execution_device # this is to avoid some problem that can occur in the pipeline pipe.register_modules( text_encoder=ConvTextEncoderWrapper(ov_text_encoder, text_encoder.config), transformer=ConvTransformerWrapper(ov_transformer, transformer.config), vqvae=ConvVQVAEWrapper(ov_vqvae, vqvae.dtype, vqvae.config), ) - + image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] image.save("text2image_256.png") .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -540,7 +534,7 @@ improve model inference speed. .. code:: ipython3 QUANTIZED_TRANSFORMER_OV_PATH = Path(str(TRANSFORMER_OV_PATH).replace(".xml", "_quantized.xml")) - + skip_for_device = "GPU" in device.value to_quantize = widgets.Checkbox(value=not skip_for_device, description="Quantization", disabled=skip_for_device) to_quantize @@ -557,12 +551,12 @@ improve model inference speed. .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Prepare calibration dataset @@ -578,42 +572,42 @@ model inputs for calibration we customize ``CompiledModel``. .. code:: ipython3 %%skip not $to_quantize.value - + import datasets from tqdm.auto import tqdm from typing import Any, Dict, List import pickle import numpy as np - - + + def disable_progress_bar(pipeline, disable=True): if not hasattr(pipeline, "_progress_bar_config"): pipeline._progress_bar_config = {'disable': disable} else: pipeline._progress_bar_config['disable'] = disable - - + + class CompiledModelDecorator(ov.CompiledModel): def __init__(self, compiled_model: ov.CompiledModel, data_cache: List[Any] = None, keep_prob: float = 0.5): super().__init__(compiled_model) self.data_cache = data_cache if data_cache is not None else [] self.keep_prob = keep_prob - + def __call__(self, *args, **kwargs): if np.random.rand() <= self.keep_prob: self.data_cache.append(*args) return super().__call__(*args, **kwargs) - - + + def collect_calibration_data(ov_transformer_model, calibration_dataset_size: int) -> List[Dict]: calibration_dataset_filepath = Path(f"calibration_data/{calibration_dataset_size}.pkl") if not calibration_dataset_filepath.exists(): calibration_data = [] pipe.transformer.transformer = CompiledModelDecorator(ov_transformer_model, calibration_data, keep_prob=1.0) disable_progress_bar(pipe) - + dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True).shuffle(seed=42) - + # Run inference for data collection pbar = tqdm(total=calibration_dataset_size) for batch in dataset: @@ -624,14 +618,14 @@ model inputs for calibration we customize ``CompiledModel``. pbar.update(len(calibration_data) - pbar.n) if pbar.n >= calibration_dataset_size: break - + pipe.transformer.transformer = ov_transformer_model disable_progress_bar(pipe, disable=False) - + calibration_dataset_filepath.parent.mkdir(exist_ok=True, parents=True) with open(calibration_dataset_filepath, 'wb') as f: pickle.dump(calibration_data, f) - + with open(calibration_dataset_filepath, 'rb') as f: calibration_data = pickle.load(f) return calibration_data @@ -647,14 +641,14 @@ model. .. code:: ipython3 %%skip not $to_quantize.value - + from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.range_estimator import RangeEstimatorParameters, StatisticsCollectorParameters, StatisticsType, \ AggregatorType import nncf - + CALIBRATION_DATASET_SIZE = 12 * 25 - + if not QUANTIZED_TRANSFORMER_OV_PATH.exists(): calibration_data = collect_calibration_data(ov_transformer, CALIBRATION_DATASET_SIZE) quantized_model = nncf.quantize( @@ -694,7 +688,7 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -705,17 +699,17 @@ model. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + @@ -726,17 +720,17 @@ model. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + @@ -756,41 +750,41 @@ model. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/experimental/tensor/tensor.py:92: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) -.. raw:: html -
-    
+ + + @@ -800,21 +794,21 @@ Demo generation with quantized pipeline .. code:: ipython3 %%skip not $to_quantize.value - + original_ov_transformer_model = pipe.transformer.transformer pipe.transformer.transformer = core.compile_model(QUANTIZED_TRANSFORMER_OV_PATH, device.value) - + image = pipe(prompt, generator=torch.Generator('cpu').manual_seed(8)).images[0] image.save('text2image_256_quantized.png') - + pipe.transformer.transformer = original_ov_transformer_model - + display(image) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -845,24 +839,24 @@ a rough estimate of generation quality. .. code:: ipython3 %%skip not $to_quantize.value - + from torchmetrics.image.inception import InceptionScore from torchvision import transforms as transforms from itertools import islice import time - + VALIDATION_DATASET_SIZE = 100 - + def compute_inception_score(ov_transformer_model_path, validation_set_size, batch_size=100): original_ov_transformer_model = pipe.transformer.transformer pipe.transformer.transformer = core.compile_model(ov_transformer_model_path, device.value) - + disable_progress_bar(pipe) dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", "unlabeled", split="validation", trust_remote_code=True).shuffle(seed=42) dataset = islice(dataset, validation_set_size) - + inception_score = InceptionScore(normalize=True, splits=1) - + images = [] infer_times = [] for batch in tqdm(dataset, total=validation_set_size, desc="Computing Inception Score"): @@ -874,21 +868,21 @@ a rough estimate of generation quality. infer_times.append(time.perf_counter() - start_time) image = transforms.ToTensor()(image) images.append(image) - + mean_perf_time = sum(infer_times) / len(infer_times) - + while len(images) > 0: images_batch = torch.stack(images[-batch_size:]) images = images[:-batch_size] inception_score.update(images_batch) kl_mean, kl_std = inception_score.compute() - + pipe.transformer.transformer = original_ov_transformer_model disable_progress_bar(pipe, disable=False) - + return kl_mean, mean_perf_time - - + + original_inception_score, original_time = compute_inception_score(TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) print(f"Original pipeline Inception Score: {original_inception_score}") quantized_inception_score, quantized_time = compute_inception_score(QUANTIZED_TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) @@ -898,7 +892,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. warnings.warn(\*args, \*\*kwargs) # noqa: B028 @@ -910,9 +904,9 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-708/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:176: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1807.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:176: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1807.) return kl.mean(), kl.std() @@ -930,7 +924,7 @@ a rough estimate of generation quality. .. parsed-literal:: Quantized pipeline Inception Score: 9.630992889404297 - Quantization speed-up: 2.10x + Quantization speed-up: 2.09x Interactive inference @@ -943,13 +937,13 @@ Below you can select which pipeline to run: original or quantized. .. code:: ipython3 quantized_model_present = QUANTIZED_TRANSFORMER_OV_PATH.exists() - + use_quantized_model = widgets.Checkbox( value=True if quantized_model_present else False, description="Use quantized pipeline", disabled=not quantized_model_present, ) - + use_quantized_model @@ -965,18 +959,18 @@ Below you can select which pipeline to run: original or quantized. import gradio as gr import numpy as np - + pipe.transformer.transformer = core.compile_model( QUANTIZED_TRANSFORMER_OV_PATH if use_quantized_model.value else TRANSFORMER_OV_PATH, device.value, ) - - + + def generate(prompt, seed, _=gr.Progress(track_tqdm=True)): image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(seed)).images[0] return image - - + + demo = gr.Interface( generate, [ @@ -1003,7 +997,7 @@ Below you can select which pipeline to run: original or quantized. .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/animate-anyone-with-output.rst b/docs/notebooks/animate-anyone-with-output.rst index 4240a1d02cc673..3b1ec8172cd808 100644 --- a/docs/notebooks/animate-anyone-with-output.rst +++ b/docs/notebooks/animate-anyone-with-output.rst @@ -34,16 +34,17 @@ Learn more in `GitHub repo `__ and `paper `__. -.. warning:: +.. container:: alert alert-warning - This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for - inference. Changing the values of ``HEIGHT<``, ``WIDTH`` and ``VIDEO_LENGTH`` variables - will change the memory consumption but will also affect accuracy. + :: +

! WARNING !

+

+ This tutorial requires at least 96 GB of RAM for model conversion and 40 GB for inference. Changing the values of HEIGHT, WIDTH and VIDEO_LENGTH variables will change the memory consumption but will also affect accuracy. +

+**Table of contents:** -Table of contents: -^^^^^^^^^^^^^^^^^^ - `Prerequisites <#prerequisites>`__ - `Prepare base model <#prepare-base-model>`__ @@ -153,11 +154,11 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-707/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-707/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-707/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -290,13 +291,13 @@ Download weights .. parsed-literal:: - .gitattributes: 0%| | 0.00/1.46k [00:00 -.. raw:: html -
-    
+ + + + + @@ -576,17 +577,17 @@ of the pipeline, it will be better to convert them to separate models. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + @@ -650,17 +651,17 @@ step. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + @@ -751,17 +752,17 @@ step. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + @@ -805,17 +806,17 @@ efficiently integrate pose control signals into the denoising process. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + @@ -844,7 +845,7 @@ required for both reference and denoising UNets. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-707/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4481: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-727/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4565: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -865,17 +866,17 @@ required for both reference and denoising UNets. -.. raw:: html -

 
 
 
 
-.. raw:: html
 
-    
-    
+ + + + + @@ -1215,7 +1216,7 @@ Video post-processing .. raw:: html diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 9a6bc4f791cd80..7ba7205d4eb836 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -11,8 +11,8 @@ device is busy with inference, the application can perform other tasks in parallel (for example, populating inputs or scheduling other requests) rather than wait for the current inference to complete first. -Table of contents: -^^^^^^^^^^^^^^^^^^ +**Table of contents:** + - `Imports <#imports>`__ - `Prepare model and data @@ -48,7 +48,7 @@ Imports .. code:: ipython3 import platform - + %pip install -q "openvino>=2023.1.0" %pip install -q opencv-python if platform.system() != "windows": @@ -72,15 +72,15 @@ Imports import openvino as ov from IPython import display import matplotlib.pyplot as plt - + # Fetch the notebook utils script from the openvino_notebooks repo import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + import notebook_utils as utils Prepare model and data processing @@ -102,7 +102,7 @@ the person in each frame of the video. # directory where model will be downloaded base_model_dir = "model" - + # model name as named in Open Model Zoo model_name = "person-detection-0202" precision = "FP16" @@ -114,13 +114,13 @@ the person in each frame of the video. .. parsed-literal:: ################|| Downloading person-detection-0202 ||################ - + ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.xml - - + + ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.bin - - + + Select inference device @@ -131,7 +131,7 @@ Select inference device .. code:: ipython3 import ipywidgets as widgets - + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -139,7 +139,7 @@ Select inference device description="Device:", disabled=False, ) - + device @@ -160,14 +160,14 @@ Load the model # initialize OpenVINO runtime core = ov.Core() - + # read the network and corresponding weights from file model = core.read_model(model=model_path) - + # compile the model for the CPU (you can choose manually CPU, GPU etc.) # or let the engine choose the best available device (AUTO) compiled_model = core.compile_model(model=model, device_name=device.value) - + # get input node input_layer_ir = model.input(0) N, C, H, W = input_layer_ir.shape @@ -183,7 +183,7 @@ Create functions for data processing def preprocess(image): """ Define the preprocess function for input data - + :param: image: the orignal input frame :returns: resized_image: the image processed @@ -193,12 +193,12 @@ Create functions for data processing resized_image = resized_image.transpose((2, 0, 1)) resized_image = np.expand_dims(resized_image, axis=0).astype(np.float32) return resized_image - - + + def postprocess(result, image, fps): """ Define the postprocess function for output data - + :param: result: the inference results image: the orignal input frame fps: average throughput calculated for each frame @@ -273,7 +273,7 @@ immediately processed: def sync_api(source, flip, fps, use_popup, skip_first_frames): """ Define the main function for video processing in sync mode - + :param: source: the video path or the ID of your webcam :returns: sync_fps: the inference throughput in sync mode @@ -352,7 +352,7 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 59.76 fps + average throuput in sync mode: 58.66 fps Async Mode @@ -396,7 +396,7 @@ pipeline (decoding vs inference) and not by the sum of the stages. def async_api(source, flip, fps, use_popup, skip_first_frames): """ Define the main function for video processing in async mode - + :param: source: the video path or the ID of your webcam :returns: async_fps: the inference throughput in async mode @@ -491,7 +491,7 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 104.09 fps + average throuput in async mode: 103.49 fps Compare the performance @@ -503,20 +503,20 @@ Compare the performance width = 0.4 fontsize = 14 - + plt.rc("font", size=fontsize) fig, ax = plt.subplots(1, 1, figsize=(10, 8)) - + rects1 = ax.bar([0], sync_fps, width, color="#557f2d") rects2 = ax.bar([width], async_fps, width) ax.set_ylabel("frames per second") ax.set_xticks([0, width]) ax.set_xticklabels(["Sync mode", "Async mode"]) ax.set_xlabel("Higher is better") - + fig.suptitle("Sync mode VS Async mode") fig.tight_layout() - + plt.show() @@ -552,7 +552,7 @@ the possibility of passing runtime values. def callback(infer_request, info) -> None: """ Define the callback function for postprocessing - + :param: infer_request: the infer_request object info: a tuple includes original frame and starts time :returns: @@ -566,7 +566,7 @@ the possibility of passing runtime values. total_time = stop_time - start_time frame_number = frame_number + 1 inferqueue_fps = frame_number / total_time - + res = infer_request.get_output_tensor(0).data[0] frame = postprocess(res, frame, inferqueue_fps) # Encode numpy array to jpg @@ -582,7 +582,7 @@ the possibility of passing runtime values. def inferqueue(source, flip, fps, skip_first_frames) -> None: """ Define the main function for video processing with async infer queue - + :param: source: the video path or the ID of your webcam :retuns: None @@ -634,5 +634,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 149.07 fps + average throughput in async mode with async infer queue: 149.16 fps diff --git a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png index 947168a13876a1..74f4cfe3073435 100644 --- a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png +++ b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:828b054c2b0cdfb183cd60781179cd06e288e55fe15ef96e8b8295814ac58e1d -size 29389 +oid sha256:76df94bf0d66d31a6f9d3234549a1ab188e396faf9f7bd21e307e3b3454985bd +size 29294 diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index c139bff3032350..be0ee9bef2983b 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -30,8 +30,8 @@ first inference. auto -Table of contents: -^^^^^^^^^^^^^^^^^^ +**Table of contents:** + - `Import modules and create Core <#import-modules-and-create-core>`__ - `Convert the model to OpenVINO IR @@ -69,10 +69,10 @@ Import modules and create Core .. code:: ipython3 import platform - + # Install required packages %pip install -q "openvino>=2023.1.0" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - + if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" else: @@ -89,13 +89,13 @@ Import modules and create Core import time import sys - + import openvino as ov - + from IPython.display import Markdown, display - + core = ov.Core() - + if not any("GPU" in device for device in core.available_devices): display( Markdown( @@ -139,11 +139,11 @@ For more information about model conversion API, see this import torchvision from pathlib import Path - + base_model_dir = Path("./model") base_model_dir.mkdir(exist_ok=True) model_path = base_model_dir / "resnet50.xml" - + if not model_path.exists(): pt_model = torchvision.models.resnet50(weights="DEFAULT") ov_model = ov.convert_model(pt_model, input=[[1, 3, 224, 224]]) @@ -176,26 +176,26 @@ By default, ``compile_model`` API will select **AUTO** as # Set LOG_LEVEL to LOG_INFO. core.set_property("AUTO", {"LOG_LEVEL": "LOG_INFO"}) - + # Load the model onto the target device. compiled_model = core.compile_model(ov_model) - + if isinstance(compiled_model, ov.CompiledModel): print("Successfully compiled model without a device_name.") .. parsed-literal:: - [22:42:34.9458]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [22:42:34.9458]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [22:42:34.9458]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [22:42:34.9459]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [22:42:34.9459]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [22:42:34.9459]I[schedule.cpp:17][AUTO] scheduler starting - [22:42:34.9459]I[auto_schedule.cpp:134][AUTO] select device:CPU - [22:42:35.1105]I[auto_schedule.cpp:336][AUTO] Device: [CPU]: Compile model took 164.490887 ms - [22:42:35.1108]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [22:42:35.1109]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [23:26:37.1843]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [23:26:37.1844]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [23:26:37.1844]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [23:26:37.1844]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [23:26:37.1844]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [23:26:37.1844]I[schedule.cpp:17][AUTO] scheduler starting + [23:26:37.1844]I[auto_schedule.cpp:134][AUTO] select device:CPU + [23:26:37.3288]I[auto_schedule.cpp:336][AUTO] Device: [CPU]: Compile model took 144.341797 ms + [23:26:37.3290]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [23:26:37.3291]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -209,7 +209,7 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: Deleted compiled_model - [22:42:35.1221]I[schedule.cpp:308][AUTO] scheduler ending + [23:26:37.3399]I[schedule.cpp:308][AUTO] scheduler ending Explicitly pass AUTO as device_name to Core::compile_model API @@ -224,9 +224,9 @@ improve readability of your code. # Set LOG_LEVEL to LOG_NONE. core.set_property("AUTO", {"LOG_LEVEL": "LOG_NONE"}) - + compiled_model = core.compile_model(model=ov_model, device_name="AUTO") - + if isinstance(compiled_model, ov.CompiledModel): print("Successfully compiled model using AUTO.") @@ -276,25 +276,25 @@ function, we will reuse it for preparing input data. # Fetch `notebook_utils` module import requests - + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import download_file .. code:: ipython3 from PIL import Image - + # Download the image from the openvino_notebooks storage image_filename = download_file( "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", directory="data", ) - + image = Image.open(str(image_filename)) input_transform = torchvision.models.ResNet50_Weights.DEFAULT.transforms() - + input_tensor = input_transform(image) input_tensor = input_tensor.unsqueeze(0).numpy() image @@ -325,10 +325,10 @@ Load the model to GPU device and perform inference # Start time. gpu_load_start_time = time.perf_counter() compiled_model = core.compile_model(model=ov_model, device_name="GPU") # load to GPU - + # Execute the first inference. results = compiled_model(input_tensor)[0] - + # Measure time to the first inference. gpu_fil_end_time = time.perf_counter() gpu_fil_span = gpu_fil_end_time - gpu_load_start_time @@ -354,11 +354,11 @@ executed on CPU until GPU is ready. # Start time. auto_load_start_time = time.perf_counter() compiled_model = core.compile_model(model=ov_model) # The device_name is AUTO by default. - + # Execute the first inference. results = compiled_model(input_tensor)[0] - - + + # Measure time to the first inference. auto_fil_end_time = time.perf_counter() auto_fil_span = auto_fil_end_time - auto_load_start_time @@ -411,7 +411,7 @@ Class and callback definition :member: latency_list: Record the latency of each inference execution over @interval seconds duration. :member: interval: The metrics will be updated every @interval seconds """ - + def __init__(self, interval): """ Create and initilize one instance of class PerformanceMetrics. @@ -421,11 +421,11 @@ Class and callback definition """ self.fps = 0 self.latency = 0 - + self.start_time = time.perf_counter() self.latency_list = [] self.interval = interval - + def update(self, infer_request: ov.InferRequest) -> bool: """ Update the metrics if current ongoing @interval seconds duration is expired. Record the latency only if it is not expired. @@ -447,8 +447,8 @@ Class and callback definition return True else: return False - - + + class InferContext: """ Inference context. Record and update peforamnce metrics via @metrics, set @feed_inference to False once @remaining_update_num <=0 @@ -456,7 +456,7 @@ Class and callback definition :member: remaining_update_num: the remaining times for peforamnce metrics updating. :member: feed_inference: if feed inference request is required or not. """ - + def __init__(self, update_interval, num): """ Create and initilize one instance of class InferContext. @@ -468,7 +468,7 @@ Class and callback definition self.metrics = PerformanceMetrics(update_interval) self.remaining_update_num = num self.feed_inference = True - + def update(self, infer_request: ov.InferRequest): """ Update the context. Set @feed_inference to False if the number of remaining performance metric updates (@remaining_update_num) reaches 0 @@ -477,13 +477,13 @@ Class and callback definition """ if self.remaining_update_num <= 0: self.feed_inference = False - + if self.metrics.update(infer_request): self.remaining_update_num = self.remaining_update_num - 1 if self.remaining_update_num <= 0: self.feed_inference = False - - + + def completion_callback(infer_request: ov.InferRequest, context) -> None: """ callback for the inference request, pass the @infer_request to @context for updating @@ -492,8 +492,8 @@ Class and callback definition :returns: None """ context.update(infer_request) - - + + # Performance metrics update interval (seconds) and number of times. metrics_update_interval = 10 metrics_update_num = 6 @@ -509,29 +509,29 @@ Loop for inference and update the FPS/Latency every .. code:: ipython3 THROUGHPUT_hint_context = InferContext(metrics_update_interval, metrics_update_num) - + print("Compiling Model for AUTO device with THROUGHPUT hint") sys.stdout.flush() - + compiled_model = core.compile_model(model=ov_model, config={"PERFORMANCE_HINT": "THROUGHPUT"}) - + infer_queue = ov.AsyncInferQueue(compiled_model, 0) # Setting to 0 will query optimal number by default. infer_queue.set_callback(completion_callback) - + print(f"Start inference, {metrics_update_num: .0f} groups of FPS/latency will be measured over {metrics_update_interval: .0f}s intervals") sys.stdout.flush() - + while THROUGHPUT_hint_context.feed_inference: infer_queue.start_async(input_tensor, THROUGHPUT_hint_context) - + infer_queue.wait_all() - + # Take the FPS and latency of the latest period. THROUGHPUT_hint_fps = THROUGHPUT_hint_context.metrics.fps THROUGHPUT_hint_latency = THROUGHPUT_hint_context.metrics.latency - + print("Done") - + del compiled_model @@ -539,12 +539,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 180.70fps, latency: 31.51ms, time interval: 10.01s - throughput: 180.34fps, latency: 32.54ms, time interval: 10.00s - throughput: 179.29fps, latency: 32.60ms, time interval: 10.01s - throughput: 180.59fps, latency: 32.42ms, time interval: 10.01s - throughput: 179.95fps, latency: 32.56ms, time interval: 10.01s - throughput: 181.03fps, latency: 32.33ms, time interval: 10.00s + throughput: 179.02fps, latency: 31.75ms, time interval: 10.02s + throughput: 179.80fps, latency: 32.59ms, time interval: 10.00s + throughput: 179.17fps, latency: 32.63ms, time interval: 10.01s + throughput: 179.81fps, latency: 32.58ms, time interval: 10.01s + throughput: 178.74fps, latency: 32.75ms, time interval: 10.00s + throughput: 179.33fps, latency: 32.57ms, time interval: 10.02s Done @@ -559,30 +559,30 @@ Loop for inference and update the FPS/Latency for each .. code:: ipython3 LATENCY_hint_context = InferContext(metrics_update_interval, metrics_update_num) - + print("Compiling Model for AUTO Device with LATENCY hint") sys.stdout.flush() - + compiled_model = core.compile_model(model=ov_model, config={"PERFORMANCE_HINT": "LATENCY"}) - + # Setting to 0 will query optimal number by default. infer_queue = ov.AsyncInferQueue(compiled_model, 0) infer_queue.set_callback(completion_callback) - + print(f"Start inference, {metrics_update_num: .0f} groups fps/latency will be out with {metrics_update_interval: .0f}s interval") sys.stdout.flush() - + while LATENCY_hint_context.feed_inference: infer_queue.start_async(input_tensor, LATENCY_hint_context) - + infer_queue.wait_all() - + # Take the FPS and latency of the latest period. LATENCY_hint_fps = LATENCY_hint_context.metrics.fps LATENCY_hint_latency = LATENCY_hint_context.metrics.latency - + print("Done") - + del compiled_model @@ -590,12 +590,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 137.68fps, latency: 6.68ms, time interval: 10.00s - throughput: 140.68fps, latency: 6.66ms, time interval: 10.00s - throughput: 140.55fps, latency: 6.67ms, time interval: 10.00s - throughput: 140.49fps, latency: 6.67ms, time interval: 10.00s - throughput: 140.62fps, latency: 6.66ms, time interval: 10.01s - throughput: 140.67fps, latency: 6.66ms, time interval: 10.00s + throughput: 137.56fps, latency: 6.70ms, time interval: 10.00s + throughput: 140.27fps, latency: 6.69ms, time interval: 10.00s + throughput: 140.43fps, latency: 6.68ms, time interval: 10.00s + throughput: 140.33fps, latency: 6.69ms, time interval: 10.01s + throughput: 140.45fps, latency: 6.68ms, time interval: 10.00s + throughput: 140.42fps, latency: 6.68ms, time interval: 10.01s Done @@ -607,16 +607,16 @@ Difference in FPS and latency .. code:: ipython3 import matplotlib.pyplot as plt - + TPUT = 0 LAT = 1 labels = ["THROUGHPUT hint", "LATENCY hint"] - + fig1, ax1 = plt.subplots(1, 1) fig1.patch.set_visible(False) ax1.axis("tight") ax1.axis("off") - + cell_text = [] cell_text.append( [ @@ -625,7 +625,7 @@ Difference in FPS and latency ] ) cell_text.append(["%.2f%s" % (LATENCY_hint_fps, " FPS"), "%.2f%s" % (LATENCY_hint_latency, " ms")]) - + table = ax1.table( cellText=cell_text, colLabels=["FPS (Higher is better)", "Latency (Lower is better)"], @@ -640,7 +640,7 @@ Difference in FPS and latency table.auto_set_column_width(0) table.auto_set_column_width(1) table.scale(1, 3) - + fig1.tight_layout() plt.show() @@ -654,28 +654,28 @@ Difference in FPS and latency # Output the difference. width = 0.4 fontsize = 14 - + plt.rc("font", size=fontsize) fig, ax = plt.subplots(1, 2, figsize=(10, 8)) - + rects1 = ax[0].bar([0], THROUGHPUT_hint_fps, width, label=labels[TPUT], color="#557f2d") rects2 = ax[0].bar([width], LATENCY_hint_fps, width, label=labels[LAT]) ax[0].set_ylabel("frames per second") ax[0].set_xticks([width / 2]) ax[0].set_xticklabels(["FPS"]) ax[0].set_xlabel("Higher is better") - + rects1 = ax[1].bar([0], THROUGHPUT_hint_latency, width, label=labels[TPUT], color="#557f2d") rects2 = ax[1].bar([width], LATENCY_hint_latency, width, label=labels[LAT]) ax[1].set_ylabel("milliseconds") ax[1].set_xticks([width / 2]) ax[1].set_xticklabels(["Latency (ms)"]) ax[1].set_xlabel("Lower is better") - + fig.suptitle("Performance Hints") fig.legend(labels, fontsize=fontsize) fig.tight_layout() - + plt.show() diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index 3b474b8efbae73..a2762f74684237 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b81dc92d1b9ea2b743b2dd768141d5dca2b822764f0482673cd98d4c60775e3e -size 26796 +oid sha256:5be76e9bec28f8dd1d0e3c4c6240fcfed76820fc6558de89a418f3204a199182 +size 27038 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index e17540e8246d82..27990eb8699344 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:794493b6bb153b468b8d933cf8a205adb934792ffa3a67246c8de6016d9c1b8a -size 40060 +oid sha256:84431402a1505759e24f4a8babdba61d5f7648e6d8b9dd4354d15661aa945c3e +size 39968 diff --git a/docs/notebooks/bark-text-to-audio-with-output.rst b/docs/notebooks/bark-text-to-audio-with-output.rst index 1c2c30cc4f5b80..d05f4d5e5bfedc 100644 --- a/docs/notebooks/bark-text-to-audio-with-output.rst +++ b/docs/notebooks/bark-text-to-audio-with-output.rst @@ -54,8 +54,8 @@ tokens into audio codec tokens to generate the full waveform. To enable the community to use Bark via public code, EnCodec codec from Facebook is used to act as an audio representation. -Table of contents: -^^^^^^^^^^^^^^^^^^ +**Table of contents:** + - `Prerequisites <#prerequisites>`__ - `Download and Convert models <#download-and-convert-models>`__ @@ -86,11 +86,11 @@ Prerequisites .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - + [notice] A new release of pip is available: 23.3.2 -> 24.0 [notice] To update, run: pip install --upgrade pip Note: you may need to restart the kernel to use updated packages. - + [notice] A new release of pip is available: 23.3.2 -> 24.0 [notice] To update, run: pip install --upgrade pip Note: you may need to restart the kernel to use updated packages. @@ -105,7 +105,7 @@ Download and Convert models from pathlib import Path from bark.generation import load_model, codec_decode, _flatten_codebooks - + models_dir = Path("models") models_dir.mkdir(exist_ok=True) @@ -124,9 +124,9 @@ models for that. .. code:: ipython3 text_use_small = True - + text_encoder = load_model(model_type="text", use_gpu=False, use_small=text_use_small, force_reload=False) - + text_encoder_model = text_encoder["model"] tokenizer = text_encoder["tokenizer"] @@ -134,7 +134,7 @@ models for that. import torch import openvino as ov - + text_model_suffix = "_small" if text_use_small else "" text_model_dir = models_dir / f"text_encoder{text_model_suffix}" text_model_dir.mkdir(exist_ok=True) @@ -147,11 +147,11 @@ models for that. def __init__(self, encoder): super().__init__() self.encoder = encoder - + def forward(self, idx, past_kv=None): return self.encoder(idx, merge_context=True, past_kv=past_kv, use_cache=True) - - + + if not text_encoder_path0.exists() or not text_encoder_path1.exists(): text_encoder_exportable = TextEncoderModel(text_encoder_model) ov_model = ov.convert_model(text_encoder_exportable, example_input=torch.ones((1, 513), dtype=torch.int64)) @@ -200,14 +200,14 @@ provide empty tensor on the first step. .. code:: ipython3 coarse_use_small = True - + coarse_model = load_model( model_type="coarse", use_gpu=False, use_small=coarse_use_small, force_reload=False, ) - + coarse_model_suffix = "_small" if coarse_use_small else "" coarse_model_dir = models_dir / f"coarse{coarse_model_suffix}" coarse_model_dir.mkdir(exist_ok=True) @@ -216,41 +216,41 @@ provide empty tensor on the first step. .. code:: ipython3 import types - - + + class CoarseEncoderModel(torch.nn.Module): def __init__(self, encoder): super().__init__() self.encoder = encoder - + def forward(self, idx, past_kv=None): return self.encoder(idx, past_kv=past_kv, use_cache=True) - - + + def casual_self_attention_forward(self, x, past_kv=None, use_cache=False): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) - + # calculate query, key, values for all heads in batch and move head forward to be the batch dim q, k, v = self.c_attn(x).split(self.n_embd, dim=2) k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs) q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs) v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs) past_len = 0 - + if past_kv is not None: past_key = past_kv[0] past_value = past_kv[1] k = torch.cat((past_key, k), dim=-2) v = torch.cat((past_value, v), dim=-2) past_len = past_key.shape[-2] - + FULL_T = k.shape[-2] - + if use_cache is True: present = (k, v) else: present = None - + # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T) if self.flash: # efficient attention using Flash Attention @@ -270,7 +270,7 @@ provide empty tensor on the first step. ), dim=-1, ) - + y = torch.nn.functional.scaled_dot_product_attention(q, k, v, dropout_p=self.dropout, attn_mask=full_attention_mask) else: # manual implementation of attention @@ -280,12 +280,12 @@ provide empty tensor on the first step. att = self.attn_dropout(att) y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs) y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side - + # output projection y = self.resid_dropout(self.c_proj(y)) return (y, present) - - + + if not coarse_encoder_path.exists(): coarse_encoder_exportable = CoarseEncoderModel(coarse_model) for block in coarse_encoder_exportable.encoder.transformer.h: @@ -310,9 +310,9 @@ provide empty tensor on the first step. .. code:: ipython3 fine_use_small = False - + fine_model = load_model(model_type="fine", use_gpu=False, use_small=fine_use_small, force_reload=False) - + fine_model_suffix = "_small" if fine_use_small else "" fine_model_dir = models_dir / f"fine_model{fine_model_suffix}" fine_model_dir.mkdir(exist_ok=True) @@ -323,11 +323,11 @@ provide empty tensor on the first step. def __init__(self, model): super().__init__() self.model = model - + def forward(self, pred_idx, idx): b, t, codes = idx.size() pos = torch.arange(0, t, dtype=torch.long).unsqueeze(0) # shape (1, t) - + # forward the GPT model itself tok_embs = [wte(idx[:, :, i]).unsqueeze(-1) for i, wte in enumerate(self.model.transformer.wtes)] # token embeddings of shape (b, t, n_embd) tok_emb = torch.cat(tok_embs, dim=-1) @@ -338,8 +338,8 @@ provide empty tensor on the first step. x = block(x) x = self.model.transformer.ln_f(x) return x - - + + fine_feature_extractor_path = fine_model_dir / "bark_fine_feature_extractor.xml" Fine encoder @@ -384,7 +384,7 @@ For better usability, classes for working with models provided below. def __init__(self, core, device, model_path1, model_path2): self.compiled_model1 = core.compile_model(model_path1, device) self.compiled_model2 = core.compile_model(model_path2, device) - + def __call__(self, input_ids, past_kv=None): if past_kv is None: outputs = self.compiled_model1(input_ids, share_outputs=True) @@ -392,7 +392,7 @@ For better usability, classes for working with models provided below. outputs = self.compiled_model2([input_ids, *past_kv], share_outputs=True) logits, kv_cache = self.postprocess_outputs(outputs, past_kv is None) return logits, kv_cache - + def postprocess_outputs(self, outs, is_first_stage): net_outs = self.compiled_model1.outputs if is_first_stage else self.compiled_model2.outputs logits = outs[net_outs[0]] @@ -400,18 +400,18 @@ For better usability, classes for working with models provided below. for out_tensor in net_outs[1:]: kv_cache.append(outs[out_tensor]) return logits, kv_cache - - + + class OVBarkEncoder: def __init__(self, core, device, model_path): self.compiled_model = core.compile_model(model_path, device) - + def __call__(self, idx, past_kv=None): if past_kv is None: past_kv = self._init_past_kv() outs = self.compiled_model([idx, *past_kv], share_outputs=True) return self.postprocess_outputs(outs) - + def postprocess_outputs(self, outs): net_outs = self.compiled_model.outputs logits = outs[net_outs[0]] @@ -419,7 +419,7 @@ For better usability, classes for working with models provided below. for out_tensor in net_outs[1:]: kv_cache.append(outs[out_tensor]) return logits, kv_cache - + def _init_past_kv(self): inputs = [] for input_t in self.compiled_model.inputs[1:]: @@ -428,8 +428,8 @@ For better usability, classes for working with models provided below. input_shape[2] = 0 inputs.append(ov.Tensor(ov.Type.f32, input_shape.get_shape())) return inputs - - + + class OVBarkFineEncoder: def __init__(self, core, device, model_dir, num_lm_heads=7): self.feats_compiled_model = core.compile_model(model_dir / "bark_fine_feature_extractor.xml", device) @@ -438,7 +438,7 @@ For better usability, classes for working with models provided below. for i in range(num_lm_heads): lm_heads.append(core.compile_model(model_dir / f"bark_fine_lm_{i}.xml", device)) self.lm_heads = lm_heads - + def __call__(self, pred_idx, idx): feats = self.feats_compiled_model([ov.Tensor(pred_idx), ov.Tensor(idx)])[self.feats_out] lm_id = pred_idx - 1 @@ -467,8 +467,8 @@ consists from several steps, illustrated on the diagram below: from typing import Optional, Union, Dict import tqdm import numpy as np - - + + def generate_audio( text: str, history_prompt: Optional[Union[Dict, str]] = None, @@ -477,14 +477,14 @@ consists from several steps, illustrated on the diagram below: silent: bool = False, ): """Generate audio array from input text. - + Args: text: text to be turned into audio history_prompt: history choice for audio cloning text_temp: generation temperature (1.0 more diverse, 0.0 more conservative) waveform_temp: generation temperature (1.0 more diverse, 0.0 more conservative) silent: disable progress bar - + Returns: numpy audio array at sample frequency 24khz """ @@ -511,13 +511,13 @@ consists from several steps, illustrated on the diagram below: silent: bool = False, ): """Generate semantic array from text. - + Args: text: text to be turned into audio history_prompt: history choice for audio cloning temp: generation temperature (1.0 more diverse, 0.0 more conservative) silent: disable progress bar - + Returns: numpy semantic array to be fed into `semantic_to_waveform` """ @@ -550,8 +550,8 @@ consists from several steps, illustrated on the diagram below: ) import torch.nn.functional as F from typing import List, Optional, Union, Dict - - + + def generate_text_semantic( text: str, history_prompt: List[str] = None, @@ -577,7 +577,7 @@ consists from several steps, illustrated on the diagram below: allow_early_stop: allow to stop generation if maximum duration is not reached Returns: numpy semantic array to be fed into `semantic_to_waveform` - + """ text = _normalize_whitespace(text) if history_prompt is not None: @@ -670,13 +670,13 @@ consists from several steps, illustrated on the diagram below: silent: bool = False, ): """Generate audio array from semantic input. - + Args: semantic_tokens: semantic token output from `text_to_semantic` history_prompt: history choice for audio cloning temp: generation temperature (1.0 more diverse, 0.0 more conservative) silent: disable progress bar - + Returns: numpy audio array at sample frequency 24khz """ @@ -719,7 +719,7 @@ consists from several steps, illustrated on the diagram below: sliding_window_len: size of sliding window for generation cycle Returns: numpy audio array with coarse audio codes - + """ semantic_to_coarse_ratio = COARSE_RATE_HZ / SEMANTIC_RATE_HZ * N_COARSE_CODEBOOKS max_semantic_history = int(np.floor(max_coarse_history / semantic_to_coarse_ratio)) @@ -775,12 +775,12 @@ consists from several steps, illustrated on the diagram below: if n_step >= n_steps: continue is_major_step = n_step % N_COARSE_CODEBOOKS == 0 - + if kv_cache is not None: x_input = x_in[:, [-1]] else: x_input = x_in - + logits, kv_cache = ov_coarse_model(x_input, past_kv=kv_cache) logit_start_idx = SEMANTIC_VOCAB_SIZE + (1 - int(is_major_step)) * CODEBOOK_SIZE logit_end_idx = SEMANTIC_VOCAB_SIZE + (2 - int(is_major_step)) * CODEBOOK_SIZE @@ -814,8 +814,8 @@ consists from several steps, illustrated on the diagram below: for n in range(1, N_COARSE_CODEBOOKS): gen_coarse_audio_arr[n, :] -= n * CODEBOOK_SIZE return gen_coarse_audio_arr - - + + def generate_fine( x_coarse_gen: np.ndarray, history_prompt: Optional[Union[Dict, str]] = None, @@ -831,7 +831,7 @@ consists from several steps, illustrated on the diagram below: silent: disable progress bar Returns: numpy audio array with coarse audio codes - + """ if history_prompt is not None: history_prompt = _load_history_prompt(history_prompt) @@ -912,16 +912,16 @@ select device from dropdown list for running inference using OpenVINO import ipywidgets as widgets import openvino as ov - + core = ov.Core() - + device = widgets.Dropdown( options=core.available_devices + ["AUTO"], value="AUTO", description="Device:", disabled=False, ) - + device @@ -936,7 +936,7 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 core = ov.Core() - + ov_text_model = OVBarkTextEncoder(core, device.value, text_encoder_path0, text_encoder_path1) ov_coarse_model = OVBarkEncoder(core, device.value, coarse_encoder_path) ov_fine_model = OVBarkFineEncoder(core, device.value, fine_model_dir) @@ -945,14 +945,14 @@ select device from dropdown list for running inference using OpenVINO import time from bark import SAMPLE_RATE - + torch.manual_seed(42) t0 = time.time() text = "Hello, my name is Suno. And, uh — and I like banana and apples. [laughs] But I also have other interests such as playing tic tac toe." audio_array = generate_audio(text) generation_duration_s = time.time() - t0 audio_duration_s = audio_array.shape[0] / SAMPLE_RATE - + print(f"took {generation_duration_s:.0f}s to generate {audio_duration_s:.0f}s of audio") @@ -973,7 +973,7 @@ select device from dropdown list for running inference using OpenVINO from IPython.display import Audio from bark import SAMPLE_RATE - + Audio(audio_array, rate=SAMPLE_RATE) @@ -981,7 +981,7 @@ select device from dropdown list for running inference using OpenVINO .. raw:: html - +