Skip to content

feat: Adding live progress monitoring to the engine building phase #3087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/build-test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
pushd .
cd tests/modules
Expand Down Expand Up @@ -112,6 +113,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
Expand Down Expand Up @@ -140,6 +142,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
Expand Down Expand Up @@ -168,6 +171,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py
Expand Down Expand Up @@ -196,6 +200,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/
Expand Down Expand Up @@ -226,6 +231,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/
Expand Down Expand Up @@ -256,6 +262,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
nvidia-smi
Expand Down Expand Up @@ -286,6 +293,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/core
python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml .
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/build-test-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ jobs:
pre-script: packaging/driver_upgrade.bat
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/modules
python hub.py
Expand Down Expand Up @@ -114,6 +115,7 @@ jobs:
pre-script: packaging/driver_upgrade.bat
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/
Expand All @@ -139,6 +141,7 @@ jobs:
pre-script: packaging/driver_upgrade.bat
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
Expand All @@ -164,6 +167,7 @@ jobs:
pre-script: packaging/driver_upgrade.bat
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py
Expand All @@ -189,6 +193,7 @@ jobs:
pre-script: packaging/driver_upgrade.bat
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/
Expand Down Expand Up @@ -216,6 +221,7 @@ jobs:
pre-script: packaging/driver_upgrade.bat
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/
Expand Down Expand Up @@ -246,6 +252,7 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py
Expand All @@ -272,6 +279,7 @@ jobs:
pre-script: packaging/driver_upgrade.bat
script: |
export USE_HOST_DEPS=1
export CI_BUILD=1
pushd .
cd tests/py/core
python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml .
Expand Down
159 changes: 159 additions & 0 deletions py/torch_tensorrt/dynamo/conversion/_TRTBuilderMonitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import os
import sys
from typing import Any, Dict, Optional

import tensorrt as trt


class _ASCIIMonitor(trt.IProgressMonitor): # type: ignore
def __init__(self, engine_name: str = "") -> None:
trt.IProgressMonitor.__init__(self)
self._active_phases: Dict[str, Dict[str, Any]] = {}
self._step_result = True

self._render = True
if (ci_env_var := os.environ.get("CI_BUILD")) is not None:
if ci_env_var == "1":
self._render = False

def phase_start(
self, phase_name: str, parent_phase: Optional[str], num_steps: int
) -> None:
try:
if parent_phase is not None:
nbIndents = 1 + self._active_phases[parent_phase]["nbIndents"]
else:
nbIndents = 0
self._active_phases[phase_name] = {
"title": phase_name,
"steps": 0,
"num_steps": num_steps,
"nbIndents": nbIndents,
}
self._redraw()
except KeyboardInterrupt:
_step_result = False

def phase_finish(self, phase_name: str) -> None:
try:
del self._active_phases[phase_name]
self._redraw(blank_lines=1) # Clear the removed phase.
except KeyboardInterrupt:
_step_result = False

def step_complete(self, phase_name: str, step: int) -> bool:
try:
self._active_phases[phase_name]["steps"] = step
self._redraw()
return self._step_result
except KeyboardInterrupt:
return False

def _redraw(self, *, blank_lines: int = 0) -> None:
if self._render:

def clear_line() -> None:
print("\x1B[2K", end="")

def move_to_start_of_line() -> None:
print("\x1B[0G", end="")

def move_cursor_up(lines: int) -> None:
print("\x1B[{}A".format(lines), end="")

def progress_bar(steps: int, num_steps: int) -> str:
INNER_WIDTH = 10
completed_bar_chars = int(INNER_WIDTH * steps / float(num_steps))
return "[{}{}]".format(
"=" * completed_bar_chars, "-" * (INNER_WIDTH - completed_bar_chars)
)

# Set max_cols to a default of 200 if not run in interactive mode.
max_cols = os.get_terminal_size().columns if sys.stdout.isatty() else 200

move_to_start_of_line()
for phase in self._active_phases.values():
phase_prefix = "{indent}{bar} {title}".format(
indent=" " * phase["nbIndents"],
bar=progress_bar(phase["steps"], phase["num_steps"]),
title=phase["title"],
)
phase_suffix = "{steps}/{num_steps}".format(**phase)
allowable_prefix_chars = max_cols - len(phase_suffix) - 2
if allowable_prefix_chars < len(phase_prefix):
phase_prefix = phase_prefix[0 : allowable_prefix_chars - 3] + "..."
clear_line()
print(phase_prefix, phase_suffix)
for line in range(blank_lines):
clear_line()
print()
move_cursor_up(len(self._active_phases) + blank_lines)
sys.stdout.flush()


try:
from rich.progress import BarColumn, Progress, TaskID, TextColumn, TimeElapsedColumn

class _RichMonitor(trt.IProgressMonitor): # type: ignore
def __init__(self, engine_name: str = "") -> None:
trt.IProgressMonitor.__init__(self)
self._active_phases: Dict[str, TaskID] = {}
self._step_result = True

self._progress_monitors = Progress(
TextColumn(" "),
TimeElapsedColumn(),
TextColumn("{task.description}: "),
BarColumn(),
TextColumn(" {task.percentage:.0f}% ({task.completed}/{task.total})"),
)

self._render = True
if (ci_env_var := os.environ.get("CI_BUILD")) is not None:
if ci_env_var == "1":
self._render = False

if self._render:
self._progress_monitors.start()

def phase_start(
self, phase_name: str, parent_phase: Optional[str], num_steps: int
) -> None:
try:
self._active_phases[phase_name] = self._progress_monitors.add_task(
phase_name, total=num_steps
)
self._progress_monitors.refresh()
except KeyboardInterrupt:
# The phase_start callback cannot directly cancel the build, so request the cancellation from within step_complete.
_step_result = False

def phase_finish(self, phase_name: str) -> None:
try:
self._progress_monitors.update(
self._active_phases[phase_name], visible=False
)
self._progress_monitors.stop_task(self._active_phases[phase_name])
self._progress_monitors.remove_task(self._active_phases[phase_name])
self._progress_monitors.refresh()
except KeyboardInterrupt:
_step_result = False

def step_complete(self, phase_name: str, step: int) -> bool:
try:
self._progress_monitors.update(
self._active_phases[phase_name], completed=step
)
self._progress_monitors.refresh()
return self._step_result
except KeyboardInterrupt:
# There is no need to propagate this exception to TensorRT. We can simply cancel the build.
return False

def __del__(self) -> None:
if self._progress_monitors:
self._progress_monitors.stop()

TRTBulderMonitor: trt.IProgressMonitor = _RichMonitor
except ImportError:
TRTBulderMonitor: trt.IProgressMonitor = _ASCIIMonitor # type: ignore[no-redef]
35 changes: 15 additions & 20 deletions py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Sequence, Set, Tuple

import numpy as np
import tensorrt as trt
import torch
import torch.fx
from torch.fx.node import _get_qualified_name
Expand All @@ -21,6 +20,7 @@
DYNAMO_CONVERTERS as CONVERTERS,
)
from torch_tensorrt.dynamo.conversion._ConverterRegistry import CallingConvention
from torch_tensorrt.dynamo.conversion._TRTBuilderMonitor import TRTBulderMonitor
from torch_tensorrt.dynamo.conversion.converter_utils import (
get_node_io,
get_node_name,
Expand All @@ -30,6 +30,7 @@
from torch_tensorrt.fx.observer import Observer
from torch_tensorrt.logging import TRT_LOGGER

import tensorrt as trt
from packaging import version

_LOGGER: logging.Logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -146,7 +147,7 @@ def clean_repr(x: Any, depth: int = 0) -> Any:
else:
return "(...)"
else:
return x
return f"{x} <{type(x).__name__}>"

str_args = [clean_repr(a) for a in args]
return repr(tuple(str_args))
Expand Down Expand Up @@ -176,6 +177,10 @@ def _populate_trt_builder_config(
) -> trt.IBuilderConfig:

builder_config = self.builder.create_builder_config()

if self.compilation_settings.debug:
builder_config.progress_monitor = TRTBulderMonitor()

if self.compilation_settings.workspace_size != 0:
builder_config.set_memory_pool_limit(
trt.MemoryPoolType.WORKSPACE, self.compilation_settings.workspace_size
Expand Down Expand Up @@ -516,18 +521,18 @@ def run_node(self, n: torch.fx.Node) -> torch.fx.Node:
kwargs["_itensor_to_tensor_meta"] = self._itensor_to_tensor_meta
n.kwargs = kwargs

# run the node
_LOGGER.debug(
f"Running node {self._cur_node_name}, a {self._cur_node.op} node "
f"with target {self._cur_node.target} in the TensorRT Interpreter"
)
if _LOGGER.isEnabledFor(logging.DEBUG):
_LOGGER.debug(
f"Converting node {self._cur_node_name} (kind: {n.target}, args: {TRTInterpreter._args_str(n.args)})"
)

trt_node: torch.fx.Node = super().run_node(n)

if n.op == "get_attr":
self.const_mapping[str(n)] = (tuple(trt_node.shape), str(trt_node.dtype))

_LOGGER.debug(
f"Ran node {self._cur_node_name} with properties: {get_node_io(n, self.const_mapping)}"
_LOGGER.info(
f"Converted node {self._cur_node_name} [{n.target}] ({get_node_io(n, self.const_mapping)})"
)

# remove "_itensor_to_tensor_meta"
Expand Down Expand Up @@ -611,9 +616,7 @@ def call_module(
converter, calling_convention = converter_packet

assert self._cur_node_name is not None
_LOGGER.debug(
f"Converting node {self._cur_node_name} (kind: {target}, args: {TRTInterpreter._args_str(args)})"
)

if calling_convention is CallingConvention.LEGACY:
return converter(self.ctx.net, submod, args, kwargs, self._cur_node_name)
else:
Expand All @@ -629,10 +632,6 @@ def call_function(self, target: str, args: Any, kwargs: Any) -> Any:

converter, calling_convention = converter_packet

assert self._cur_node_name is not None
_LOGGER.debug(
f"Converting node {self._cur_node_name} (kind: {target}, args: {TRTInterpreter._args_str(args)})"
)
if calling_convention is CallingConvention.LEGACY:
return converter(self.ctx.net, target, args, kwargs, self._cur_node_name)
else:
Expand Down Expand Up @@ -663,10 +662,6 @@ def call_method(self, target: str, args: Any, kwargs: Any) -> Any:
)
converter, calling_convention = converter_packet

assert self._cur_node_name is not None
_LOGGER.debug(
f"Converting node {self._cur_node_name} (kind: {target}, args: {TRTInterpreter._args_str(args)})"
)
if calling_convention is CallingConvention.LEGACY:
return converter(self.ctx.net, target, args, kwargs, self._cur_node_name)
else:
Expand Down
Loading
Loading