Skip to content

Fix: Revert conv/deconv to use get_trt_weights for refit to record weights #3574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 34 additions & 28 deletions py/torch_tensorrt/dynamo/conversion/converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ def create_constant(
dtype: Optional[Union[torch.dtype, np.dtype, TRTDataType, _enums.dtype]],
min_rank: Optional[int] = 1,
target_quantized_type: Optional[TRTDataType] = None,
return_trt_weights: Optional[bool] = False,
) -> TRTTensor:
"""
Add a TensorRT constant layer whose value is `value` to `ctx.net`.
Expand All @@ -371,6 +372,7 @@ def create_constant(
If a dtype is given, we will convert the type of the given `value` to this dtype.
min_rank (int): minimum rank of the constant tensor.
target_quantized_type (Optional[TRTDataType]): If a quantized type is given, we will convert the type of the given `value` to this dtype.
return_trt_weights (Optional[bool]): If True, return a TensorRT Weights object instead of a TensorRT ITensor.
Returns:
A TensorRT ITensor that represents the given value.
"""
Expand All @@ -393,8 +395,8 @@ def create_constant(
else:
shape = list(torch_value.shape)

cpu_weight_reference_key: str = name
if torch_value is not None:

if torch_value.dtype == torch.uint8:
if (
target_quantized_type is None
Expand All @@ -405,41 +407,38 @@ def create_constant(
"Currently supported target_quantized_type for uint8 is FP4, got {target_quantized_type=}"
)
shape[-1] = shape[-1] * 2
weights = trt.Weights(
trt_weights = trt.Weights(
type=trt.DataType.FP4,
ptr=torch_value.data_ptr(),
count=torch_value.numel() * 2,
)
constant = ctx.net.add_constant(
shape,
weights,
)
constant.name = name
ctx.cpu_weights_reference_holder[name + " FP4_CONSTANT"] = torch_value
return constant.get_output(0)

# TODO: Refit map uses numpy arrays. Remove this once refit is updated to use torch.Tensor
if torch_value.dtype == torch.bfloat16:
torch_value_fp32 = torch_value.to(torch.float32)
numpy_value = torch_value_fp32.numpy()
cpu_weight_reference_key = name + " FP4_CONSTANT"
else:
numpy_value = torch_value.numpy()
# TODO: Refit map uses numpy arrays. Remove this once refit is updated to use torch.Tensor
if torch_value.dtype == torch.bfloat16:
torch_value_fp32 = torch_value.to(torch.float32)
numpy_value = torch_value_fp32.numpy()
else:
numpy_value = torch_value.numpy()

# Used for refit
ctx.weight_refit_map[name + " CONSTANT"] = numpy_value.reshape(-1)
# Used for refit
ctx.weight_refit_map[name + " CONSTANT"] = numpy_value.reshape(-1)

# This is a buffer to hold the torch.Tensor so that they are alive during the course of TRT compilation.
ctx.cpu_weights_reference_holder[name] = torch_value
# Convert the torch.Tensor to a trt.Weights object
trt_weights = to_trt_weights(torch_value)

# Convert the torch.Tensor to a trt.Weights object
trt_weights = to_trt_weights(torch_value)
constant = ctx.net.add_constant(
shape,
trt_weights,
)
constant.name = name
# This is a buffer to hold the torch.Tensor so that they are alive during the course of TRT compilation.
ctx.cpu_weights_reference_holder[cpu_weight_reference_key] = torch_value

return constant.get_output(0)
if return_trt_weights:
return trt_weights
else:
constant = ctx.net.add_constant(
shape,
trt_weights,
)
constant.name = name
return constant.get_output(0)
else:
raise ValueError(
f"Cannot convert tensor '{name}' to a TensorRT constant because its value is None."
Expand All @@ -453,6 +452,7 @@ def get_trt_tensor(
dtype: Optional[Union[torch.dtype, np.dtype, TRTDataType, _enums.dtype]] = None,
min_rank: int = 1,
target_quantized_type: Optional[TRTDataType] = None,
return_trt_weights: Optional[bool] = False,
) -> TRTTensor:
"""
Given a value of random type, we try to convert it to a TensorRT ITensor.
Expand Down Expand Up @@ -480,7 +480,13 @@ def get_trt_tensor(

if isinstance(input_val, (torch.Tensor, np.ndarray, int, float, bool)):
return create_constant(
ctx, input_val, name, dtype, min_rank, target_quantized_type
ctx,
input_val,
name,
dtype,
min_rank,
target_quantized_type,
return_trt_weights,
)
elif isinstance(input_val, TRTTensor):
return input_val
Expand Down
7 changes: 4 additions & 3 deletions py/torch_tensorrt/dynamo/conversion/impl/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
has_dynamic_shape,
set_layer_name,
to_torch,
to_trt_weights,
)
from torch_tensorrt.fx.converters.converter_utils import (
get_dyn_range,
Expand Down Expand Up @@ -55,7 +54,8 @@ def convNd(
# Process bias terms
if isinstance(bias, (torch.Tensor, np.ndarray)):
bias = to_torch(bias, dtype=input.dtype)
bias = get_trt_tensor(ctx, bias, f"{name}_bias")
# This should return a trt.Weights object
bias = get_trt_tensor(ctx, bias, f"{name}_bias", return_trt_weights=True)

elif isinstance(bias, TRTTensor):
bias = get_trt_tensor(ctx, bias, f"{name}_bias")
Expand Down Expand Up @@ -85,7 +85,8 @@ def convNd(

num_output_maps = weight.shape[0]
kernel_shape = weight.shape[2:]
weight = to_trt_weights(weight)
# This should return a trt.Weights object
weight = get_trt_tensor(ctx, weight, f"{name}_weight", return_trt_weights=True)

else:
raise RuntimeError(
Expand Down
7 changes: 4 additions & 3 deletions py/torch_tensorrt/dynamo/conversion/impl/deconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
get_trt_tensor,
has_dynamic_shape,
to_torch,
to_trt_weights,
)
from torch_tensorrt.fx.converters.converter_utils import (
get_dyn_range,
Expand Down Expand Up @@ -55,7 +54,8 @@ def deconvNd(
if isinstance(bias, (torch.Tensor, np.ndarray)):
# Transform the bias constant into a Numpy array
bias = to_torch(bias, dtype=input.dtype)
bias = get_trt_tensor(ctx, bias, f"{name}_bias")
# This should return a trt.Weights object
bias = get_trt_tensor(ctx, bias, f"{name}_bias", return_trt_weights=True)

elif isinstance(bias, TRTTensor):
bias = get_trt_tensor(ctx, bias, f"{name}_bias")
Expand Down Expand Up @@ -85,7 +85,8 @@ def deconvNd(
weight = torch.unsqueeze(weight, -1)
num_output_maps = weight.shape[1]
kernel_shape = weight.shape[2:]
weight = to_trt_weights(weight)
# This should return a trt.Weights object
weight = get_trt_tensor(ctx, weight, f"{name}_weight", return_trt_weights=True)

else:
raise RuntimeError(
Expand Down
Loading