address comment

lostella · lostella · commit 3d2ae402ae66 · 2024-11-08T09:34:04.000+01:00
diff --git a/src/chronos/chronos.py b/src/chronos/chronos.py
@@ -169,6 +169,7 @@ def __init__(
     def _input_transform(
         self, context: torch.Tensor, scale: Optional[torch.Tensor] = None
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        context = context.to(dtype=torch.float32)
         attention_mask = ~torch.isnan(context)
 
         if scale is None:
@@ -370,7 +371,10 @@ def left_pad_and_stack_1D(tensors: List[torch.Tensor]) -> torch.Tensor:
         assert isinstance(c, torch.Tensor)
         assert c.ndim == 1
         padding = torch.full(
-            size=(max_len - len(c),), fill_value=torch.nan, device=c.device
+            size=(max_len - len(c),),
+            fill_value=torch.nan,
+            device=c.device,
+            dtype=c.dtype,
         )
         padded.append(torch.concat((padding, c), dim=-1))
     return torch.stack(padded)
@@ -397,7 +401,7 @@ class ChronosPipeline:
     model: ChronosModel
 
     def _prepare_and_validate_context(
-        self, context: Union[torch.Tensor, List[torch.Tensor]], dtype=torch.float32
+        self, context: Union[torch.Tensor, List[torch.Tensor]]
     ):
         if isinstance(context, list):
             context = left_pad_and_stack_1D(context)
@@ -406,7 +410,7 @@ def _prepare_and_validate_context(
             context = context.unsqueeze(0)
         assert context.ndim == 2
 
-        return context.to(dtype=dtype)
+        return context
 
     @torch.no_grad()
     def embed(
@@ -506,6 +510,9 @@ def predict(
                 raise ValueError(msg)
             warnings.warn(msg)
 
+        input_dtype = context_tensor.dtype
+        input_device = context_tensor.device
+
         predictions = []
         remaining = prediction_length
 
@@ -536,7 +543,7 @@ def predict(
                 [context_tensor, prediction.median(dim=1).values], dim=-1
             )
 
-        return torch.cat(predictions, dim=-1)
+        return torch.cat(predictions, dim=-1).to(dtype=input_dtype, device=input_device)
 
     @classmethod
     def from_pretrained(cls, *args, **kwargs):
diff --git a/test/test_chronos.py b/test/test_chronos.py
@@ -163,32 +163,33 @@ def validate_tensor(a: torch.Tensor, shape: Tuple[int, ...], dtype) -> None:
     assert a.dtype == dtype
 
 
-@pytest.mark.parametrize("torch_dtype", [torch.float32, torch.bfloat16])
-def test_pipeline_predict(torch_dtype: str):
+@pytest.mark.parametrize("model_dtype", [torch.float32, torch.bfloat16])
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16])
+def test_pipeline_predict(model_dtype: torch.dtype, input_dtype: torch.dtype):
     pipeline = ChronosPipeline.from_pretrained(
         Path(__file__).parent / "dummy-chronos-model",
         device_map="cpu",
-        torch_dtype=torch_dtype,
+        torch_dtype=model_dtype,
     )
-    context = 10 * torch.rand(size=(4, 16)) + 10
+    context = 10 * torch.rand(size=(4, 16), dtype=input_dtype) + 10
 
     # input: tensor of shape (batch_size, context_length)
 
     samples = pipeline.predict(context, num_samples=12, prediction_length=3)
-    validate_tensor(samples, shape=(4, 12, 3), dtype=torch.float32)
+    validate_tensor(samples, shape=(4, 12, 3), dtype=input_dtype)
 
     with pytest.raises(ValueError):
         samples = pipeline.predict(context, num_samples=7, prediction_length=65)
 
     samples = pipeline.predict(
         context, num_samples=7, prediction_length=65, limit_prediction_length=False
     )
-    validate_tensor(samples, shape=(4, 7, 65), dtype=torch.float32)
+    validate_tensor(samples, shape=(4, 7, 65), dtype=input_dtype)
 
     # input: batch_size-long list of tensors of shape (context_length,)
 
     samples = pipeline.predict(list(context), num_samples=12, prediction_length=3)
-    validate_tensor(samples, shape=(4, 12, 3), dtype=torch.float32)
+    validate_tensor(samples, shape=(4, 12, 3), dtype=input_dtype)
 
     with pytest.raises(ValueError):
         samples = pipeline.predict(list(context), num_samples=7, prediction_length=65)
@@ -199,12 +200,12 @@ def test_pipeline_predict(torch_dtype: str):
         prediction_length=65,
         limit_prediction_length=False,
     )
-    validate_tensor(samples, shape=(4, 7, 65), dtype=torch.float32)
+    validate_tensor(samples, shape=(4, 7, 65), dtype=input_dtype)
 
     # input: tensor of shape (context_length,)
 
     samples = pipeline.predict(context[0, ...], num_samples=12, prediction_length=3)
-    validate_tensor(samples, shape=(1, 12, 3), dtype=torch.float32)
+    validate_tensor(samples, shape=(1, 12, 3), dtype=input_dtype)
 
     with pytest.raises(ValueError):
         samples = pipeline.predict(context[0, ...], num_samples=7, prediction_length=65)
@@ -215,40 +216,41 @@ def test_pipeline_predict(torch_dtype: str):
         prediction_length=65,
         limit_prediction_length=False,
     )
-    validate_tensor(samples, shape=(1, 7, 65), dtype=torch.float32)
+    validate_tensor(samples, shape=(1, 7, 65), dtype=input_dtype)
 
 
-@pytest.mark.parametrize("torch_dtype", [torch.float32, torch.bfloat16])
-def test_pipeline_embed(torch_dtype: str):
+@pytest.mark.parametrize("model_dtype", [torch.float32, torch.bfloat16])
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.bfloat16])
+def test_pipeline_embed(model_dtype: torch.dtype, input_dtype: torch.dtype):
     pipeline = ChronosPipeline.from_pretrained(
         Path(__file__).parent / "dummy-chronos-model",
         device_map="cpu",
-        torch_dtype=torch_dtype,
+        torch_dtype=model_dtype,
     )
     d_model = pipeline.model.model.config.d_model
-    context = 10 * torch.rand(size=(4, 16)) + 10
+    context = 10 * torch.rand(size=(4, 16), dtype=input_dtype) + 10
     expected_embed_length = 16 + (1 if pipeline.model.config.use_eos_token else 0)
 
     # input: tensor of shape (batch_size, context_length)
 
     embedding, scale = pipeline.embed(context)
     validate_tensor(
-        embedding, shape=(4, expected_embed_length, d_model), dtype=torch_dtype
+        embedding, shape=(4, expected_embed_length, d_model), dtype=model_dtype
     )
     validate_tensor(scale, shape=(4,), dtype=torch.float32)
 
     # input: batch_size-long list of tensors of shape (context_length,)
 
     embedding, scale = pipeline.embed(list(context))
     validate_tensor(
-        embedding, shape=(4, expected_embed_length, d_model), dtype=torch_dtype
+        embedding, shape=(4, expected_embed_length, d_model), dtype=model_dtype
     )
     validate_tensor(scale, shape=(4,), dtype=torch.float32)
 
     # input: tensor of shape (context_length,)
     embedding, scale = pipeline.embed(context[0, ...])
     validate_tensor(
-        embedding, shape=(1, expected_embed_length, d_model), dtype=torch_dtype
+        embedding, shape=(1, expected_embed_length, d_model), dtype=model_dtype
     )
     validate_tensor(scale, shape=(1,), dtype=torch.float32)