diff --git a/.bazelrc b/.bazelrc index f9e0b4ab07..ddaae0155b 100644 --- a/.bazelrc +++ b/.bazelrc @@ -25,6 +25,9 @@ build --cxxopt="-fdiagnostics-color=always" build --cxxopt='-std=c++17' #build --linkopt="-Wl,--no-as-needed" +build:windows --cxxopt="/GS-" --cxxopt="/std:c++17" --cxxopt="/permissive-" +build:windows --cxxopt="/wd4244" --cxxopt="/wd4267" --cxxopt="/wd4819" +build:windows --features=windows_export_all_symbols build:python --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" build:python --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0" diff --git a/.github/scripts/install-torch-tensorrt-windows.sh b/.github/scripts/install-torch-tensorrt-windows.sh new file mode 100644 index 0000000000..81adc4e1d5 --- /dev/null +++ b/.github/scripts/install-torch-tensorrt-windows.sh @@ -0,0 +1,14 @@ +set -eou pipefail +source "${BUILD_ENV_FILE}" + +# Install test index version of Torch and Torchvision +${CONDA_RUN} ${PIP_INSTALL_TORCH} torchvision +${CONDA_RUN} pip install pyyaml mpmath==1.3.0 + +# Install TRT 10 from PyPi +${CONDA_RUN} pip install tensorrt==10.0.0b6 tensorrt-${CU_VERSION::4}-bindings==10.0.0b6 tensorrt-${CU_VERSION::4}-libs==10.0.0b6 --extra-index-url https://pypi.nvidia.com + +# Install pre-built Torch-TRT +${CONDA_RUN} pip install ${RUNNER_ARTIFACT_DIR}/torch_tensorrt*.whl + +echo -e "Running test script"; diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml new file mode 100644 index 0000000000..c7f1ba1d6b --- /dev/null +++ b/.github/workflows/build-test-windows.yml @@ -0,0 +1,159 @@ +name: Build and test Windows wheels + +on: + pull_request: + push: + branches: + - main + - nightly + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: windows + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + build: + needs: generate-matrix + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script_windows.sh + smoke-test-script: packaging/smoke_test_windows.py + package-name: torch_tensorrt + name: Build torch-tensorrt whl package + uses: pytorch/test-infra/.github/workflows/build_wheels_windows.yml@main + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + smoke-test-script: ${{ matrix.smoke-test-script }} + package-name: ${{ matrix.package-name }} + trigger-event: ${{ github.event_name }} + + tests-py-dynamo-converters: + name: Test dynamo converters [Python] + needs: [generate-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-converters + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + pushd . + cd tests/py/dynamo + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/ + popd + + tests-py-dynamo-fe: + name: Test dynamo frontend [Python] + needs: [generate-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-fe + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + pushd . + cd tests/py/dynamo + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py + popd + + tests-py-torch-compile-be: + name: Test torch compile backend [Python] + needs: [generate-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-torch-compile-be + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + pre-script: packaging/driver_upgrade.bat + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + script: | + export USE_HOST_DEPS=1 + pushd . + cd tests/py/dynamo + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py + popd + + tests-py-dynamo-core: + name: Test dynamo core [Python] + needs: [generate-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-core + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + pushd . + cd tests/py/dynamo + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/ + ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + popd diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml new file mode 100644 index 0000000000..b00b4c3dd1 --- /dev/null +++ b/.github/workflows/windows-test.yml @@ -0,0 +1,140 @@ +name: Test on Windows + +on: + workflow_call: + inputs: + repository: + description: 'Repository to checkout, defaults to ""' + default: "" + type: string + ref: + description: 'Reference to checkout, defaults to "nightly"' + default: "nightly" + type: string + test-infra-repository: + description: "Test infra repository to use" + default: "pytorch/test-infra" + type: string + test-infra-ref: + description: "Test infra reference to use" + default: "" + type: string + build-matrix: + description: "Build matrix to utilize" + default: "" + type: string + pre-script: + description: "Pre script to run prior to build" + default: "" + type: string + script: + description: 'Script to utilize' + default: "python setup.py bdist_wheel" + type: string + job-name: + description: "Name for the job, which is displayed in the GitHub UI" + default: "windows-job" + type: string + continue-on-error: + description: "Prevents a job from failing when a step fails. Set to true to allow a job to pass when exec script step fails." + default: false + type: boolean + +jobs: + test: + strategy: + fail-fast: false + matrix: ${{ fromJSON(inputs.build-matrix) }} + env: + PYTHON_VERSION: ${{ matrix.python_version }} + PACKAGE_TYPE: wheel + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + CU_VERSION: ${{ matrix.desired_cuda }} + SCRIPT: ${{ inputs.script }} + PYTHONUTF8: 1 + name: ${{ inputs.job-name }}-${{ matrix.desired_cuda }} + runs-on: windows.8xlarge.nvidia.gpu.nonephemeral + defaults: + run: + shell: bash -l {0} + # If a build is taking longer than 60 minutes on these runners we need + # to have a conversation + timeout-minutes: 60 + steps: + - name: Clean workspace + run: | + echo "::group::Cleanup debug output" + rm -rfv "${GITHUB_WORKSPACE}" + mkdir -p "${GITHUB_WORKSPACE}" + echo "::endgroup::" + - name: Checkout repository (${{ inputs.test-infra-repository }}@${{ inputs.test-infra-ref }}) + uses: actions/checkout@v3 + with: + # Support the use case where we need to checkout someone's fork + repository: ${{ inputs.test-infra-repository }} + ref: ${{ inputs.test-infra-ref }} + path: test-infra + - name: Setup Windows + uses: ./test-infra/.github/actions/setup-windows + - name: Setup SSH + uses: ./test-infra/.github/actions/setup-ssh + with: + github-secret: ${{ github.token }} + - name: Add Conda scripts to GitHub path + run: | + echo "C:/Jenkins/Miniconda3/Scripts" >> $GITHUB_PATH + - uses: ./test-infra/.github/actions/set-channel + - uses: ./test-infra/.github/actions/setup-binary-builds + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + setup-miniconda: false + python-version: ${{ env.PYTHON_VERSION }} + cuda-version: ${{ env.CU_VERSION }} + arch: ${{ env.ARCH }} + - name: Run Pre-Script with Caching + if: ${{ inputs.pre-script != '' }} + uses: ./test-infra/.github/actions/run-script-with-cache + with: + cache-path: ${{ inputs.cache-path }} + cache-key: ${{ inputs.cache-key }} + repository: ${{ inputs.repository }} + script: ${{ inputs.pre-script }} + is_windows: 'enabled' + - name: Download artifacts + uses: actions/download-artifact@v3 + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ runner.temp }}/artifacts/ + - name: Pack script + continue-on-error: ${{ inputs.continue-on-error }} + working-directory: ${{ inputs.repository }} + env: + ALL_SECRETS: ${{ toJSON(secrets) }} + run: | + set -eou pipefail + source "${BUILD_ENV_FILE}" + { + echo "${SCRIPT}"; + } > "user_script" + cat .github/scripts/install-torch-tensorrt-windows.sh user_script > exec_script + - name: Run script + uses: ./test-infra/.github/actions/run-script-with-cache + with: + repository: ${{ inputs.repository }} + script: exec_script + is_windows: 'enabled' + - name: Surface failing tests + if: always() + uses: pmeier/pytest-results-action@v0.3.0 + with: + path: ${{ env.RUNNER_TEST_RESULTS_DIR }} + fail-on-empty: false + - name: Teardown Windows + if: ${{ always() }} + uses: ./test-infra/.github/actions/teardown-windows + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true diff --git a/BUILD b/BUILD.bazel similarity index 95% rename from BUILD rename to BUILD.bazel index 3138a5d021..950839a40e 100644 --- a/BUILD +++ b/BUILD.bazel @@ -52,7 +52,7 @@ pkg_tar( pkg_tar( name = "lib", srcs = select({ - ":windows": ["//cpp/lib:torch_tensorrt.dll"], + ":windows": ["//cpp/lib:torchtrt.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt.so", "//cpp/lib:libtorchtrt_plugins.so", @@ -66,7 +66,7 @@ pkg_tar( pkg_tar( name = "lib_rt", srcs = select({ - ":windows": ["//cpp/lib:torch_tensorrt_runtime.dll"], + ":windows": ["//cpp/lib:torchtrt_runtime.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt_runtime.so", ], diff --git a/core/conversion/BUILD b/core/conversion/BUILD index 318aebdfbf..aa40c58ed7 100644 --- a/core/conversion/BUILD +++ b/core/conversion/BUILD @@ -31,6 +31,7 @@ cc_library( ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], "//conditions:default": ["@libtorch"], }), + alwayslink = True, ) pkg_tar( diff --git a/core/conversion/conversionctx/BUILD b/core/conversion/conversionctx/BUILD index 3c2dea1a3e..b52128a7f0 100644 --- a/core/conversion/conversionctx/BUILD +++ b/core/conversion/conversionctx/BUILD @@ -26,6 +26,7 @@ cc_library( ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], "//conditions:default": ["@libtorch"], }), + alwayslink = True, ) pkg_tar( diff --git a/core/conversion/converters/impl/activation.cpp b/core/conversion/converters/impl/activation.cpp index 95be9b670d..eb106c8ad3 100644 --- a/core/conversion/converters/impl/activation.cpp +++ b/core/conversion/converters/impl/activation.cpp @@ -91,7 +91,7 @@ auto acthardtanh TORCHTRT_UNUSED = // Out_tensor of ParametricReLU shape is all 0, when slopes nDims is not equal to in nDims. // Since make sure splopes nDims is equal to in nDims. - if (slopes.ndimension() == 1 and original_shape.nbDims != slopes.ndimension()) { + if (slopes.ndimension() == 1 && original_shape.nbDims != slopes.ndimension()) { std::vector slopes_new_shape(original_shape.nbDims, 1); auto first_inputs_allowed_formats = ctx->net->getInput(0)->getAllowedFormats(); for (size_t inputs_index = 1; inputs_index < ctx->num_inputs; inputs_index++) { diff --git a/core/ir/BUILD b/core/ir/BUILD index 1d4d4832b8..11b0c09ba4 100644 --- a/core/ir/BUILD +++ b/core/ir/BUILD @@ -28,6 +28,7 @@ cc_library( ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], "//conditions:default": ["@libtorch"], }), + alwayslink = True, ) pkg_tar( diff --git a/core/lowering/passes/BUILD b/core/lowering/passes/BUILD index c64385a2d9..ae1b06bfcf 100644 --- a/core/lowering/passes/BUILD +++ b/core/lowering/passes/BUILD @@ -52,6 +52,7 @@ cc_library( ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], "//conditions:default": ["@libtorch"], }), + alwayslink = True, ) pkg_tar( diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp index 4a33907bec..023f54c113 100644 --- a/core/runtime/TRTEngine.cpp +++ b/core/runtime/TRTEngine.cpp @@ -211,7 +211,7 @@ void TRTEngine::dump_engine_layer_info_to_file(const std::string& path) { void TRTEngine::dump_engine_layer_info() { std::string layer_info_file = - std::experimental::filesystem::path{profile_path_prefix + "/" + name + "_layer_information.json"}.string(); + std::filesystem::path{profile_path_prefix + "/" + name + "_layer_information.json"}.string(); dump_engine_layer_info_to_file(layer_info_file); return; } @@ -229,16 +229,12 @@ std::string TRTEngine::get_engine_layer_info() { void TRTEngine::set_profiling_paths() { device_profile_path = - std::experimental::filesystem::path{profile_path_prefix + "/" + name + "_device_config_profile.trace"}.string(); - input_profile_path = - std::experimental::filesystem::path{profile_path_prefix + "/" + name + "_input_profile.trace"}.string(); - output_profile_path = - std::experimental::filesystem::path{profile_path_prefix + "/" + name + "_output_profile.trace"}.string(); - enqueue_profile_path = - std::experimental::filesystem::path{profile_path_prefix + "/" + name + "_enqueue_profile.trace"}.string(); + std::filesystem::path{profile_path_prefix + "/" + name + "_device_config_profile.trace"}.string(); + input_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_input_profile.trace"}.string(); + output_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_output_profile.trace"}.string(); + enqueue_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_enqueue_profile.trace"}.string(); trt_engine_profile_path = - std::experimental::filesystem::path{profile_path_prefix + "/" + name + "_engine_exectuion_profile.trace"} - .string(); + std::filesystem::path{profile_path_prefix + "/" + name + "_engine_exectuion_profile.trace"}.string(); } std::string TRTEngine::to_str() const { diff --git a/core/runtime/TRTEngine.h b/core/runtime/TRTEngine.h index 3d52aa2689..7960d04b46 100644 --- a/core/runtime/TRTEngine.h +++ b/core/runtime/TRTEngine.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -26,7 +26,7 @@ struct TRTEngine : torch::CustomClassHolder { std::string name; RTDevice device_info; - std::string profile_path_prefix = std::experimental::filesystem::temp_directory_path().string(); + std::string profile_path_prefix = std::filesystem::temp_directory_path().string(); std::unordered_map in_binding_map = {}; // TRT IDX -> PYT IDX std::unordered_map out_binding_map = {}; // TRT IDX -> PYT IDX diff --git a/core/util/BUILD b/core/util/BUILD index e2f6684830..be44122d8d 100644 --- a/core/util/BUILD +++ b/core/util/BUILD @@ -57,6 +57,7 @@ cc_library( hdrs = [ "Exception.h", ], + alwayslink = True, ) cc_library( @@ -88,6 +89,7 @@ cc_library( ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], "//conditions:default": ["@libtorch"], }), + alwayslink = True, ) pkg_tar( diff --git a/core/util/logging/BUILD b/core/util/logging/BUILD index 9aa91abee9..802d1cc18d 100644 --- a/core/util/logging/BUILD +++ b/core/util/logging/BUILD @@ -24,6 +24,7 @@ cc_library( ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], "//conditions:default": ["@libtorch"], }), + alwayslink = True, ) pkg_tar( diff --git a/cpp/lib/BUILD b/cpp/lib/BUILD index 0520a90a20..b48d1200ad 100644 --- a/cpp/lib/BUILD +++ b/cpp/lib/BUILD @@ -34,7 +34,7 @@ cc_binary( ) cc_binary( - name = "torch_tensorrt.dll", + name = "torchtrt.dll", srcs = [], linkshared = True, linkstatic = True, @@ -42,3 +42,14 @@ cc_binary( "//cpp:torch_tensorrt", ], ) + +cc_binary( + name = "torchtrt_runtime.dll", + srcs = [], + linkshared = True, + linkstatic = True, + deps = [ + "//core/plugins:torch_tensorrt_plugins", + "//core/runtime", + ], +) diff --git a/packaging/driver_upgrade.bat b/packaging/driver_upgrade.bat new file mode 100644 index 0000000000..886e908f55 --- /dev/null +++ b/packaging/driver_upgrade.bat @@ -0,0 +1,10 @@ +REM Source: https://github.com/pytorch/builder/blob/4e109742d88ff3c85e77d60bc4d90d229d7f6afe/windows/internal/driver_update.bat + +set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe" +curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output 528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe +if errorlevel 1 exit /b 1 + +start /wait 528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe -s -noreboot +if errorlevel 1 exit /b 1 + +del 528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe || ver > NUL diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh new file mode 100644 index 0000000000..2004966613 --- /dev/null +++ b/packaging/pre_build_script_windows.sh @@ -0,0 +1,8 @@ +python -m pip install -U numpy packaging pyyaml setuptools wheel + +# Install TRT 10 from PyPi +python -m pip install tensorrt==10.0.0b6 tensorrt-${CU_VERSION::4}-bindings==10.0.0b6 tensorrt-${CU_VERSION::4}-libs==10.0.0b6 --extra-index-url https://pypi.nvidia.com + +choco install bazelisk -y + +cat toolchains/ci_workspaces/WORKSPACE.win.release.tmpl | envsubst > WORKSPACE diff --git a/packaging/smoke_test_windows.py b/packaging/smoke_test_windows.py new file mode 100644 index 0000000000..c7880cd862 --- /dev/null +++ b/packaging/smoke_test_windows.py @@ -0,0 +1,14 @@ +import subprocess + +import tensorrt # noqa: F401 +import torch + +print(f"Torch CUDA version: {torch.version.cuda}") + +result = subprocess.run( + ["systeminfo"], + capture_output=True, + text=True, +) +print(result.stdout) +print(result.stderr) diff --git a/py/BUILD b/py/BUILD.bazel similarity index 100% rename from py/BUILD rename to py/BUILD.bazel diff --git a/py/requirements.txt b/py/requirements.txt index d402fd501e..291f7b7457 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -6,4 +6,4 @@ torch==2.3.0 torchvision==0.18.0 --extra-index-url https://pypi.ngc.nvidia.com pyyaml -tensorrt \ No newline at end of file +tensorrt diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index b2bc0660e6..1b9faf08a8 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -90,13 +90,21 @@ def _find_lib(name: str, paths: List[str]) -> str: def _register_with_torch() -> None: trtorch_dir = os.path.dirname(__file__) - if os.path.isfile(trtorch_dir + "/lib/libtorchtrt.so"): + linked_file = ( + "/torchtrt.dll" if sys.platform.startswith("win") else "/lib/libtorchtrt.so" + ) + linked_file_runtime = ( + "/torchtrt_runtime.dll" + if sys.platform.startswith("win") + else "/lib/libtorchtrt_runtime.so" + ) + if os.path.isfile(trtorch_dir + linked_file): assert ENABLED_FEATURES.torchscript_frontend assert ENABLED_FEATURES.torch_tensorrt_runtime - torch.ops.load_library(trtorch_dir + "/lib/libtorchtrt.so") - elif os.path.isfile(trtorch_dir + "/lib/libtorchtrt_runtime.so"): + torch.ops.load_library(trtorch_dir + linked_file) + elif os.path.isfile(trtorch_dir + linked_file_runtime): assert ENABLED_FEATURES.torch_tensorrt_runtime - torch.ops.load_library(trtorch_dir + "/lib/libtorchtrt_runtime.so") + torch.ops.load_library(trtorch_dir + linked_file_runtime) _register_with_torch() diff --git a/setup.py b/setup.py index 494eaa7ee1..ba29433e5f 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ from datetime import datetime from distutils.cmd import Command from pathlib import Path -from shutil import copyfile, rmtree +from shutil import copyfile, rmtree, which from typing import List import setuptools @@ -21,7 +21,7 @@ from setuptools.command.develop import develop from setuptools.command.editable_wheel import editable_wheel from setuptools.command.install import install -from torch.utils import cpp_extension +from torch.utils.cpp_extension import IS_WINDOWS, BuildExtension, CUDAExtension from wheel.bdist_wheel import bdist_wheel __version__: str = "0.0.0" @@ -78,11 +78,12 @@ def load_dep_info(): load_dep_info() -dir_path = str(get_root_dir()) + "/py" +dir_path = os.path.join(str(get_root_dir()), "py") CXX11_ABI = False JETPACK_VERSION = None -PY_ONLY = False +# TODO: Remove once C++ Runtime is integrated in Windows +PY_ONLY = IS_WINDOWS NO_TS = False LEGACY = False RELEASE = False @@ -167,25 +168,6 @@ def load_dep_info(): ) -def which(program): - import os - - def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - fpath, fname = os.path.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ["PATH"].split(os.pathsep): - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - - return None - - BAZEL_EXE = None if not PY_ONLY: BAZEL_EXE = which("bazelisk") @@ -216,6 +198,9 @@ def build_libtorchtrt_pre_cxx11_abi( else: print("using CXX11 ABI build") + if IS_WINDOWS: + cmd.append("--config=windows") + if JETPACK_VERSION == "4.5": cmd.append("--platforms=//toolchains:jetpack_4.5") print("Jetpack version: 4.5") @@ -238,7 +223,7 @@ def build_libtorchtrt_pre_cxx11_abi( def gen_version_file(): - if not os.path.exists(dir_path + "/torch_tensorrt/_version.py"): + if not (IS_WINDOWS or os.path.exists(dir_path + "/torch_tensorrt/_version.py")): os.mknod(dir_path + "/torch_tensorrt/_version.py") with open(dir_path + "/torch_tensorrt/_version.py", "w") as f: @@ -254,7 +239,16 @@ def copy_libtorchtrt(multilinux=False, rt_only=False): os.makedirs(dir_path + "/torch_tensorrt/lib") print("copying library into module") - if multilinux: + if IS_WINDOWS: + copyfile( + dir_path + "/../bazel-bin/cpp/lib/torchtrt.dll", + dir_path + "/torch_tensorrt/torchtrt.dll", + ) + copyfile( + dir_path + "/../bazel-bin/cpp/lib/torchtrt.dll.if.lib", + dir_path + "/torch_tensorrt/lib/torchtrt.lib", + ) + elif multilinux: copyfile( dir_path + "/build/libtrtorch_build/libtrtorch.so", dir_path + "/trtorch/lib/libtrtorch.so", @@ -377,22 +371,23 @@ class CleanCommand(Command): """Custom clean command to tidy up the project root.""" PY_CLEAN_DIRS = [ - "./build", - "./dist", - "./torch_tensorrt/__pycache__", - "./torch_tensorrt/lib", - "./torch_tensorrt/include", - "./torch_tensorrt/bin", - "./*.pyc", - "./*.tgz", - "./*.egg-info", + os.path.join(".", "build"), + os.path.join(".", "dist"), + os.path.join(".", "torch_tensorrt", "__pycache__"), + os.path.join(".", "torch_tensorrt", "lib"), + os.path.join(".", "torch_tensorrt", "include"), + os.path.join(".", "torch_tensorrt", "bin"), + os.path.join(".", "*.pyc"), + os.path.join(".", "*.tgz"), + os.path.join(".", "*.egg-info"), ] PY_CLEAN_FILES = [ - "./torch_tensorrt/*.so", - "./torch_tensorrt/_version.py", - "./torch_tensorrt/BUILD", - "./torch_tensorrt/WORKSPACE", - "./torch_tensorrt/LICENSE", + os.path.join(".", "torch_tensorrt", "*.so"), + os.path.join(".", "torch_tensorrt", "*.dll"), + os.path.join(".", "torch_tensorrt", "_version.py"), + os.path.join(".", "torch_tensorrt", "BUILD"), + os.path.join(".", "torch_tensorrt", "WORKSPACE"), + os.path.join(".", "torch_tensorrt", "LICENSE"), ] description = "Command to tidy up the project root" user_options = [] @@ -487,7 +482,7 @@ def run(self): if not (PY_ONLY or NO_TS): ext_modules += [ - cpp_extension.CUDAExtension( + CUDAExtension( "torch_tensorrt._C", [ "py/" + f @@ -513,33 +508,44 @@ def run(self): dir_path + "/../", "/usr/local/cuda", ], - extra_compile_args=[ - "-Wno-deprecated", - "-Wno-deprecated-declarations", - ] - + ( - ["-D_GLIBCXX_USE_CXX11_ABI=1"] - if CXX11_ABI - else ["-D_GLIBCXX_USE_CXX11_ABI=0"] + extra_compile_args=( + [ + "/GS-", + "/permissive-", + ] + if IS_WINDOWS + else [ + "-Wno-deprecated", + "-Wno-deprecated-declarations", + ] + + ( + ["-D_GLIBCXX_USE_CXX11_ABI=1"] + if CXX11_ABI + else ["-D_GLIBCXX_USE_CXX11_ABI=0"] + ) ), - extra_link_args=[ - "-Wno-deprecated", - "-Wno-deprecated-declarations", - "-Wl,--no-as-needed", - "-ltorchtrt", - "-Wl,-rpath,$ORIGIN/lib", - "-lpthread", - "-ldl", - "-lutil", - "-lrt", - "-lm", - "-Xlinker", - "-export-dynamic", - ] - + ( - ["-D_GLIBCXX_USE_CXX11_ABI=1"] - if CXX11_ABI - else ["-D_GLIBCXX_USE_CXX11_ABI=0"] + extra_link_args=( + [] + if IS_WINDOWS + else [ + "-Wno-deprecated", + "-Wno-deprecated-declarations", + "-Wl,--no-as-needed", + "-ltorchtrt", + "-Wl,-rpath,$ORIGIN/lib", + "-lpthread", + "-ldl", + "-lutil", + "-lrt", + "-lm", + "-Xlinker", + "-export-dynamic", + ] + + ( + ["-D_GLIBCXX_USE_CXX11_ABI=1"] + if CXX11_ABI + else ["-D_GLIBCXX_USE_CXX11_ABI=0"] + ) ), undef_macros=["NDEBUG"], ) @@ -582,6 +588,7 @@ def run(self): "include/torch_tensorrt/core/util/logging/*.h", "bin/*", "lib/*", + "*.dll", ] } ) @@ -610,7 +617,7 @@ def run(self): "install": InstallCommand, "clean": CleanCommand, "develop": DevelopCommand, - "build_ext": cpp_extension.BuildExtension, + "build_ext": BuildExtension, "bdist_wheel": BdistCommand, "editable_wheel": EditableWheelCommand, }, diff --git a/tests/py/dynamo/lowering/test_aten_lowering_passes.py b/tests/py/dynamo/lowering/test_aten_lowering_passes.py index 2d7a4731f5..0dd9a8de1c 100644 --- a/tests/py/dynamo/lowering/test_aten_lowering_passes.py +++ b/tests/py/dynamo/lowering/test_aten_lowering_passes.py @@ -1,9 +1,11 @@ +import sys import unittest import torch -import torch_tensorrt from torch.testing._internal.common_utils import TestCase, run_tests +import torch_tensorrt + from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing @@ -273,6 +275,10 @@ def forward(self, q, k, v): torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8, "GPU compute capability is too low to run flash attention, need Ampere (8.0) or greater", ) +@unittest.skipIf( + sys.platform.startswith("win"), + "Test not supported on Windows", +) class TestLowerFlashAttention(TestCase): def test_lower_flash_attention(self): class FlashAttention(torch.nn.Module): diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index 38889a3df8..e4edd21643 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -4,12 +4,13 @@ import pytest import timm import torch -import torch_tensorrt as torchtrt import torchvision.models as models from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity from transformers import BertModel from transformers.utils.fx import symbolic_trace as transformers_trace +import torch_tensorrt as torchtrt + assertions = unittest.TestCase() @@ -108,7 +109,9 @@ def test_efficientnet_b0(ir): @pytest.mark.unit def test_bert_base_uncased(ir): - model = BertModel.from_pretrained("bert-base-uncased").cuda().eval() + model = ( + BertModel.from_pretrained("bert-base-uncased", return_dict=False).cuda().eval() + ) input = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda") input2 = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda") @@ -139,8 +142,8 @@ def test_bert_base_uncased(ir): msg=f"Number of outputs for BERT model compilation is different with Pytorch {len(model_outputs)} and TensorRT {len(trt_model_outputs)}. Please check the compilation.", ) - for key, _ in model_outputs.items(): - out, trt_out = model_outputs[key], trt_model_outputs[key] + for index in range(len(model_outputs)): + out, trt_out = model_outputs[index], trt_model_outputs[index] cos_sim = cosine_similarity(out, trt_out) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, diff --git a/tests/py/dynamo/runtime/test_hw_compat.py b/tests/py/dynamo/runtime/test_hw_compat.py index fa87c9947c..6106c96512 100644 --- a/tests/py/dynamo/runtime/test_hw_compat.py +++ b/tests/py/dynamo/runtime/test_hw_compat.py @@ -2,9 +2,10 @@ import unittest import torch -import torch_tensorrt from torch.testing._internal.common_utils import TestCase, run_tests +import torch_tensorrt + class TestHardwareCompatibility(TestCase): @unittest.skipIf( @@ -63,8 +64,9 @@ def forward(self, x): self.assertIn("Hardware Compatibility: Disabled", cpp_repr) @unittest.skipIf( - torch.ops.tensorrt.ABI_VERSION() != "5", - "Detected incorrect ABI version, please update this test case", + not torch_tensorrt.ENABLED_FEATURES.torch_tensorrt_runtime + or torch.ops.tensorrt.ABI_VERSION() != "5", + "Torch-TensorRT runtime is not available or ABI Version is compatible", ) @unittest.skipIf( not torch_tensorrt.ENABLED_FEATURES.torch_tensorrt_runtime, diff --git a/third_party/cudnn/local/BUILD b/third_party/cudnn/local/BUILD index d83ac2ec16..2dbe45ad69 100644 --- a/third_party/cudnn/local/BUILD +++ b/third_party/cudnn/local/BUILD @@ -1,4 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") +load("@rules_cc//cc:defs.bzl", "cc_library") package(default_visibility = ["//visibility:public"]) @@ -35,13 +35,13 @@ cc_library( visibility = ["//visibility:private"], ) -cc_import( +cc_library( name = "cudnn_lib", - shared_library = select({ - ":aarch64_linux": "lib/aarch64-linux-gnu/libcudnn.so", - ":ci_rhel_x86_64_linux": "lib64/libcudnn.so", - ":windows": "bin/cudnn64_7.dll", #Need to configure specific version for windows - "//conditions:default": "lib/x86_64-linux-gnu/libcudnn.so", + srcs = select({ + ":aarch64_linux": ["lib/aarch64-linux-gnu/libcudnn.so"], + ":ci_rhel_x86_64_linux": ["lib64/libcudnn.so"], + ":windows": ["lib/x64/cudnn.lib"], + "//conditions:default": ["lib/x86_64-linux-gnu/libcudnn.so"], }), visibility = ["//visibility:private"], ) diff --git a/third_party/libtorch/BUILD b/third_party/libtorch/BUILD index 1284bba477..46de7dd8d0 100644 --- a/third_party/libtorch/BUILD +++ b/third_party/libtorch/BUILD @@ -23,7 +23,6 @@ cc_library( "lib/torch.lib", "lib/torch_cpu.lib", "lib/torch_cuda.lib", - "lib/torch_global_deps.dll", ], "//conditions:default": [ "lib/libtorch.so", diff --git a/third_party/tensorrt/local/BUILD b/third_party/tensorrt/local/BUILD index c317e16688..ef9ff7b956 100644 --- a/third_party/tensorrt/local/BUILD +++ b/third_party/tensorrt/local/BUILD @@ -330,7 +330,7 @@ cc_library( srcs = select({ ":aarch64_linux": ["lib/aarch64-linux-gnu/libnvinfer_plugin.so"], ":ci_rhel_x86_64_linux": ["lib64/libnvinfer_plugin.so"], - ":windows": ["lib/nvinfer_plugin.dll"], + ":windows": ["lib/nvinfer_plugin.lib"], "//conditions:default": ["lib/x86_64-linux-gnu/libnvinfer_plugin.so"], }), hdrs = select({ diff --git a/toolchains/ci_workspaces/WORKSPACE.win.release.tmpl b/toolchains/ci_workspaces/WORKSPACE.win.release.tmpl new file mode 100644 index 0000000000..acc4ecd51c --- /dev/null +++ b/toolchains/ci_workspaces/WORKSPACE.win.release.tmpl @@ -0,0 +1,101 @@ +workspace(name = "Torch-TensorRT") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_python", + sha256 = "863ba0fa944319f7e3d695711427d9ad80ba92c6edd0b7c7443b84e904689539", + strip_prefix = "rules_python-0.22.0", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.22.0/rules_python-0.22.0.tar.gz", +) + +load("@rules_python//python:repositories.bzl", "py_repositories") + +py_repositories() + +http_archive( + name = "rules_pkg", + sha256 = "8f9ee2dc10c1ae514ee599a8b42ed99fa262b757058f65ad3c384289ff70c4b8", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.9.1/rules_pkg-0.9.1.tar.gz", + "https://github.com/bazelbuild/rules_pkg/releases/download/0.9.1/rules_pkg-0.9.1.tar.gz", + ], +) + +load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") + +rules_pkg_dependencies() + +http_archive( + name = "googletest", + sha256 = "755f9a39bc7205f5a0c428e920ddad092c33c8a1b46997def3f1d4a82aded6e1", + strip_prefix = "googletest-5ab508a01f9eb089207ee87fd547d290da39d015", + urls = ["https://github.com/google/googletest/archive/5ab508a01f9eb089207ee87fd547d290da39d015.zip"], +) + +# External dependency for torch_tensorrt if you already have precompiled binaries. +local_repository( + name = "torch_tensorrt", + path = "/opt/circleci/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch_tensorrt" +) + +# CUDA should be installed on the system locally +new_local_repository( + name = "cuda", + build_file = "@//third_party/cuda:BUILD", + path = "${CUDA_HOME}", +) + +new_local_repository( + name = "cublas", + build_file = "@//third_party/cublas:BUILD", + path = "C:/", +) +############################################################################################################# +# Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs) +############################################################################################################# + +http_archive( + name = "libtorch", + build_file = "@//third_party/libtorch:BUILD", + strip_prefix = "libtorch", + urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-cxx11-abi-shared-with-deps-latest.zip"], +) + +http_archive( + name = "libtorch_pre_cxx11_abi", + build_file = "@//third_party/libtorch:BUILD", + strip_prefix = "libtorch", + urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-shared-with-deps-latest.zip"], +) + +#################################################################################### +# Locally installed dependencies (use in cases of custom dependencies or aarch64) +#################################################################################### + +new_local_repository( + name = "cudnn", + path = "C:/", + build_file = "@//third_party/cudnn/local:BUILD" +) + +new_local_repository( + name = "tensorrt", + path = "C:/", + build_file = "@//third_party/tensorrt/local:BUILD" +) + +# ######################################################################### +# # Testing Dependencies (optional - comment out on aarch64) +# ######################################################################### + +load("@rules_python//python:pip.bzl", "pip_parse") + +pip_parse( + name = "devtools_deps", + requirements = "//:requirements-dev.txt", +) + +load("@devtools_deps//:requirements.bzl", "install_deps") + +install_deps()