Return predictions in fp32 on CPU (#219)

abdulfatir · Abdul Fatir Ansari · web-flow · commit 4c43cfbdac9f · 2024-11-29T16:54:21.000+01:00
*Issue #, if available:* N/A

*Description of changes:* This PR ensures that predictions are returned
in FP32 and on the CPU device. This choice is now better because we have
two types of models which have different types of forecasts (samples vs.
quantiles). Furthermore, `int64` input_type (our README example is one
such case) ran into issues with `predict_quantiles` before. This choice
also fixes that.


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

---------

Co-authored-by: Abdul Fatir Ansari &lt;ansarnd@amazon.de&gt;
diff --git a/src/chronos/base.py b/src/chronos/base.py
@@ -67,7 +67,8 @@ def predict(
         **kwargs,
     ):
         """
-        Get forecasts for the given time series.
+        Get forecasts for the given time series. Predictions will be
+        returned in fp32 on the cpu.
 
         Parameters
         ----------
@@ -97,6 +98,7 @@ def predict_quantiles(
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """
         Get quantile and mean forecasts for given time series.
+        Predictions will be returned in fp32 on the cpu.
 
         Parameters
         ----------
diff --git a/src/chronos/chronos.py b/src/chronos/chronos.py
@@ -500,9 +500,6 @@ def predict(  # type: ignore[override]
                 raise ValueError(msg)
             logger.warning(msg)
 
-        input_dtype = context_tensor.dtype
-        input_device = context_tensor.device
-
         predictions = []
         remaining = prediction_length
 
@@ -533,7 +530,7 @@ def predict(  # type: ignore[override]
                 [context_tensor, prediction.median(dim=1).values], dim=-1
             )
 
-        return torch.cat(predictions, dim=-1).to(dtype=input_dtype, device=input_device)
+        return torch.cat(predictions, dim=-1).to(dtype=torch.float32, device="cpu")
 
     def predict_quantiles(
         self,
diff --git a/src/chronos/chronos_bolt.py b/src/chronos/chronos_bolt.py
@@ -487,13 +487,14 @@ def predict(  # type: ignore[override]
         # TODO: We unroll the forecast of Chronos Bolt greedily with the full forecast
         # horizon that the model was trained with (i.e., 64). This results in variance collapsing
         # every 64 steps.
+        context_tensor = context_tensor.to(
+            device=self.model.device,
+            dtype=torch.float32,
+        )
         while remaining > 0:
             with torch.no_grad():
                 prediction = self.model(
-                    context=context_tensor.to(
-                        device=self.model.device,
-                        dtype=torch.float32,  # scaling should be done in 32-bit precision
-                    ),
+                    context=context_tensor,
                 ).quantile_preds.to(context_tensor)
 
             predictions.append(prediction)
@@ -507,7 +508,9 @@ def predict(  # type: ignore[override]
 
             context_tensor = torch.cat([context_tensor, central_prediction], dim=-1)
 
-        return torch.cat(predictions, dim=-1)[..., :prediction_length]
+        return torch.cat(predictions, dim=-1)[..., :prediction_length].to(
+            dtype=torch.float32, device="cpu"
+        )
 
     def predict_quantiles(
         self,
diff --git a/test/__init__.py b/test/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/test/test_chronos.py b/test/test_chronos.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from pathlib import Path
-from typing import Optional, Tuple
 
 import pytest
 import torch
@@ -13,6 +12,7 @@
     ChronosPipeline,
     MeanScaleUniformBins,
 )
+from test.util import validate_tensor
 
 
 def test_base_chronos_pipeline_loads_from_huggingface():
@@ -166,30 +166,21 @@ def test_tokenizer_random_data(use_eos_token: bool):
     assert samples.shape == (2, 10, 4)
 
 
-def validate_tensor(
-    a: torch.Tensor, shape: Tuple[int, ...], dtype: Optional[torch.dtype] = None
-) -> None:
-    assert isinstance(a, torch.Tensor)
-    assert a.shape == shape
-
-    if dtype is not None:
-        assert a.dtype == dtype
-
-
 @pytest.mark.parametrize("model_dtype", [torch.float32, torch.bfloat16])
-@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16])
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16, torch.int64])
 def test_pipeline_predict(model_dtype: torch.dtype, input_dtype: torch.dtype):
     pipeline = ChronosPipeline.from_pretrained(
         Path(__file__).parent / "dummy-chronos-model",
         device_map="cpu",
         torch_dtype=model_dtype,
     )
-    context = 10 * torch.rand(size=(4, 16), dtype=input_dtype) + 10
+    context = 10 * torch.rand(size=(4, 16)) + 10
+    context = context.to(dtype=input_dtype)
 
     # input: tensor of shape (batch_size, context_length)
 
     samples = pipeline.predict(context, num_samples=12, prediction_length=3)
-    validate_tensor(samples, shape=(4, 12, 3), dtype=input_dtype)
+    validate_tensor(samples, shape=(4, 12, 3), dtype=torch.float32)
 
     with pytest.raises(ValueError):
         samples = pipeline.predict(
@@ -199,12 +190,12 @@ def test_pipeline_predict(model_dtype: torch.dtype, input_dtype: torch.dtype):
     samples = pipeline.predict(
         context, num_samples=7, prediction_length=65, limit_prediction_length=False
     )
-    validate_tensor(samples, shape=(4, 7, 65), dtype=input_dtype)
+    validate_tensor(samples, shape=(4, 7, 65), dtype=torch.float32)
 
     # input: batch_size-long list of tensors of shape (context_length,)
 
     samples = pipeline.predict(list(context), num_samples=12, prediction_length=3)
-    validate_tensor(samples, shape=(4, 12, 3), dtype=input_dtype)
+    validate_tensor(samples, shape=(4, 12, 3), dtype=torch.float32)
 
     with pytest.raises(ValueError):
         samples = pipeline.predict(
@@ -220,12 +211,12 @@ def test_pipeline_predict(model_dtype: torch.dtype, input_dtype: torch.dtype):
         prediction_length=65,
         limit_prediction_length=False,
     )
-    validate_tensor(samples, shape=(4, 7, 65), dtype=input_dtype)
+    validate_tensor(samples, shape=(4, 7, 65), dtype=torch.float32)
 
     # input: tensor of shape (context_length,)
 
     samples = pipeline.predict(context[0, ...], num_samples=12, prediction_length=3)
-    validate_tensor(samples, shape=(1, 12, 3), dtype=input_dtype)
+    validate_tensor(samples, shape=(1, 12, 3), dtype=torch.float32)
 
     with pytest.raises(ValueError):
         samples = pipeline.predict(
@@ -240,16 +231,18 @@ def test_pipeline_predict(model_dtype: torch.dtype, input_dtype: torch.dtype):
         num_samples=7,
         prediction_length=65,
     )
-    validate_tensor(samples, shape=(1, 7, 65), dtype=input_dtype)
+    validate_tensor(samples, shape=(1, 7, 65), dtype=torch.float32)
 
 
 @pytest.mark.parametrize("model_dtype", [torch.float32, torch.bfloat16])
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16, torch.int64])
 @pytest.mark.parametrize("prediction_length", [3, 65])
 @pytest.mark.parametrize(
     "quantile_levels", [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [0.1, 0.5, 0.9]]
 )
 def test_pipeline_predict_quantiles(
     model_dtype: torch.dtype,
+    input_dtype: torch.dtype,
     prediction_length: int,
     quantile_levels: list[int],
 ):
@@ -259,6 +252,7 @@ def test_pipeline_predict_quantiles(
         torch_dtype=model_dtype,
     )
     context = 10 * torch.rand(size=(4, 16)) + 10
+    context = context.to(dtype=input_dtype)
 
     num_expected_quantiles = len(quantile_levels)
     # input: tensor of shape (batch_size, context_length)
@@ -269,8 +263,10 @@ def test_pipeline_predict_quantiles(
         prediction_length=prediction_length,
         quantile_levels=quantile_levels,
     )
-    validate_tensor(quantiles, (4, prediction_length, num_expected_quantiles))
-    validate_tensor(mean, (4, prediction_length))
+    validate_tensor(
+        quantiles, (4, prediction_length, num_expected_quantiles), dtype=torch.float32
+    )
+    validate_tensor(mean, (4, prediction_length), dtype=torch.float32)
 
     # input: batch_size-long list of tensors of shape (context_length,)
 
@@ -280,8 +276,10 @@ def test_pipeline_predict_quantiles(
         prediction_length=prediction_length,
         quantile_levels=quantile_levels,
     )
-    validate_tensor(quantiles, (4, prediction_length, num_expected_quantiles))
-    validate_tensor(mean, (4, prediction_length))
+    validate_tensor(
+        quantiles, (4, prediction_length, num_expected_quantiles), dtype=torch.float32
+    )
+    validate_tensor(mean, (4, prediction_length), dtype=torch.float32)
 
     # input: tensor of shape (context_length,)
 
@@ -291,20 +289,23 @@ def test_pipeline_predict_quantiles(
         prediction_length=prediction_length,
         quantile_levels=quantile_levels,
     )
-    validate_tensor(quantiles, (1, prediction_length, num_expected_quantiles))
-    validate_tensor(mean, (1, prediction_length))
+    validate_tensor(
+        quantiles, (1, prediction_length, num_expected_quantiles), dtype=torch.float32
+    )
+    validate_tensor(mean, (1, prediction_length), dtype=torch.float32)
 
 
 @pytest.mark.parametrize("model_dtype", [torch.float32, torch.bfloat16])
-@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16])
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16, torch.int64])
 def test_pipeline_embed(model_dtype: torch.dtype, input_dtype: torch.dtype):
     pipeline = ChronosPipeline.from_pretrained(
         Path(__file__).parent / "dummy-chronos-model",
         device_map="cpu",
         torch_dtype=model_dtype,
     )
     d_model = pipeline.model.model.config.d_model
-    context = 10 * torch.rand(size=(4, 16), dtype=input_dtype) + 10
+    context = 10 * torch.rand(size=(4, 16)) + 10
+    context = context.to(dtype=input_dtype)
     expected_embed_length = 16 + (1 if pipeline.model.config.use_eos_token else 0)
 
     # input: tensor of shape (batch_size, context_length)
diff --git a/test/test_chronos_bolt.py b/test/test_chronos_bolt.py
@@ -1,36 +1,36 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
 from pathlib import Path
-from typing import Tuple
 
 import pytest
 import torch
 
 from chronos import BaseChronosPipeline, ChronosBoltPipeline
 from chronos.chronos_bolt import InstanceNorm, Patch
-
-
-def validate_tensor(input: torch.Tensor, shape: Tuple[int, ...]) -> None:
-    assert isinstance(input, torch.Tensor)
-    assert input.shape == shape
+from test.util import validate_tensor
 
 
 def test_base_chronos_pipeline_loads_from_huggingface():
     BaseChronosPipeline.from_pretrained("amazon/chronos-bolt-tiny", device_map="cpu")
 
 
 @pytest.mark.parametrize("torch_dtype", [torch.float32, torch.bfloat16])
-def test_pipeline_predict(torch_dtype: str):
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16, torch.int64])
+def test_pipeline_predict(torch_dtype: torch.dtype, input_dtype: torch.dtype):
     pipeline = ChronosBoltPipeline.from_pretrained(
         Path(__file__).parent / "dummy-chronos-bolt-model",
         device_map="cpu",
         torch_dtype=torch_dtype,
     )
     context = 10 * torch.rand(size=(4, 16)) + 10
+    context = context.to(dtype=input_dtype)
     expected_num_quantiles = len(pipeline.quantiles)
 
     # input: tensor of shape (batch_size, context_length)
 
     quantiles = pipeline.predict(context, prediction_length=3)
-    validate_tensor(quantiles, (4, expected_num_quantiles, 3))
+    validate_tensor(quantiles, (4, expected_num_quantiles, 3), dtype=torch.float32)
 
     with pytest.raises(ValueError):
         quantiles = pipeline.predict(
@@ -43,7 +43,7 @@ def test_pipeline_predict(torch_dtype: str):
     # input: batch_size-long list of tensors of shape (context_length,)
 
     quantiles = pipeline.predict(list(context), prediction_length=3)
-    validate_tensor(quantiles, (4, expected_num_quantiles, 3))
+    validate_tensor(quantiles, (4, expected_num_quantiles, 3), dtype=torch.float32)
 
     with pytest.raises(ValueError):
         quantiles = pipeline.predict(
@@ -53,12 +53,12 @@ def test_pipeline_predict(torch_dtype: str):
         )
 
     quantiles = pipeline.predict(list(context), prediction_length=65)
-    validate_tensor(quantiles, (4, expected_num_quantiles, 65))
+    validate_tensor(quantiles, (4, expected_num_quantiles, 65), dtype=torch.float32)
 
     # input: tensor of shape (context_length,)
 
     quantiles = pipeline.predict(context[0, ...], prediction_length=3)
-    validate_tensor(quantiles, (1, expected_num_quantiles, 3))
+    validate_tensor(quantiles, (1, expected_num_quantiles, 3), dtype=torch.float32)
 
     with pytest.raises(ValueError):
         quantiles = pipeline.predict(
@@ -71,23 +71,28 @@ def test_pipeline_predict(torch_dtype: str):
         context[0, ...],
         prediction_length=65,
     )
-    validate_tensor(quantiles, (1, expected_num_quantiles, 65))
+    validate_tensor(quantiles, (1, expected_num_quantiles, 65), dtype=torch.float32)
 
 
 @pytest.mark.parametrize("torch_dtype", [torch.float32, torch.bfloat16])
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16, torch.int64])
 @pytest.mark.parametrize("prediction_length", [3, 65])
 @pytest.mark.parametrize(
     "quantile_levels", [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [0.1, 0.5, 0.9]]
 )
 def test_pipeline_predict_quantiles(
-    torch_dtype: str, prediction_length: int, quantile_levels: list[int]
+    torch_dtype: torch.dtype,
+    input_dtype: torch.dtype,
+    prediction_length: int,
+    quantile_levels: list[int],
 ):
     pipeline = ChronosBoltPipeline.from_pretrained(
         Path(__file__).parent / "dummy-chronos-bolt-model",
         device_map="cpu",
         torch_dtype=torch_dtype,
     )
     context = 10 * torch.rand(size=(4, 16)) + 10
+    context = context.to(dtype=input_dtype)
 
     num_expected_quantiles = len(quantile_levels)
     # input: tensor of shape (batch_size, context_length)
@@ -97,8 +102,10 @@ def test_pipeline_predict_quantiles(
         prediction_length=prediction_length,
         quantile_levels=quantile_levels,
     )
-    validate_tensor(quantiles, (4, prediction_length, num_expected_quantiles))
-    validate_tensor(mean, (4, prediction_length))
+    validate_tensor(
+        quantiles, (4, prediction_length, num_expected_quantiles), dtype=torch.float32
+    )
+    validate_tensor(mean, (4, prediction_length), dtype=torch.float32)
 
     # input: batch_size-long list of tensors of shape (context_length,)
 
@@ -107,8 +114,10 @@ def test_pipeline_predict_quantiles(
         prediction_length=prediction_length,
         quantile_levels=quantile_levels,
     )
-    validate_tensor(quantiles, (4, prediction_length, num_expected_quantiles))
-    validate_tensor(mean, (4, prediction_length))
+    validate_tensor(
+        quantiles, (4, prediction_length, num_expected_quantiles), dtype=torch.float32
+    )
+    validate_tensor(mean, (4, prediction_length), dtype=torch.float32)
 
     # input: tensor of shape (context_length,)
 
@@ -117,8 +126,10 @@ def test_pipeline_predict_quantiles(
         prediction_length=prediction_length,
         quantile_levels=quantile_levels,
     )
-    validate_tensor(quantiles, (1, prediction_length, num_expected_quantiles))
-    validate_tensor(mean, (1, prediction_length))
+    validate_tensor(
+        quantiles, (1, prediction_length, num_expected_quantiles), dtype=torch.float32
+    )
+    validate_tensor(mean, (1, prediction_length), dtype=torch.float32)
 
 
 # The following tests have been taken from
diff --git a/test/util.py b/test/util.py
@@ -0,0 +1,13 @@
+from typing import Optional, Tuple
+
+import torch
+
+
+def validate_tensor(
+    a: torch.Tensor, shape: Tuple[int, ...], dtype: Optional[torch.dtype] = None
+) -> None:
+    assert isinstance(a, torch.Tensor)
+    assert a.shape == shape
+
+    if dtype is not None:
+        assert a.dtype == dtype

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.`
	`2`	`+# SPDX-License-Identifier: Apache-2.0`