meta-pytorch
diff --git a/‎botorch_community/acquisition/discretized.py‎
Lines changed: 57 additions & 5 deletions b/‎botorch_community/acquisition/discretized.py‎
Lines changed: 57 additions & 5 deletions
diff --git a/‎botorch_community/acquisition/input_constructors.py‎
Lines changed: 28 additions & 0 deletions b/‎botorch_community/acquisition/input_constructors.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎botorch_community/models/prior_fitted_network.py‎
Lines changed: 119 additions & 9 deletions b/‎botorch_community/models/prior_fitted_network.py‎
Lines changed: 119 additions & 9 deletions
@@ -39,7 +39,12 @@ class DiscretizedAcquistionFunction(AcquisitionFunction, ABC):
     be implemented by subclasses to define the specific acquisition functions.
     """
 
-    def __init__(self, model: Model, posterior_transform: PosteriorTransform) -> None:
+    def __init__(
+        self,
+        model: Model,
+        posterior_transform: PosteriorTransform,
+        assume_symmetric_posterior: bool = True,
+    ) -> None:
         r"""
         Initialize the DiscretizedAcquistionFunction
 
@@ -49,8 +54,17 @@ def __init__(self, model: Model, posterior_transform: PosteriorTransform) -> Non
                 The model should be a `PFNModel`.
             posterior_transform: A ScalarizedPosteriorTransform that can only
                 indicate minimization or maximization of the objective.
+            assume_symmetric_posterior: If True, we simply negate train y, if
+                the task is to minimize the objective. Else, we use a proper
+                posterior transform. We cannot do this generally, as some
+                models only support maximization. This does not mean that
+                the posterior distribution for a particular set is symmetric
+                but that one can negate the y's of the context and get out
+                negated ys.
         """
         super().__init__(model=model)
+        self.set_X_pending(None)
+        self.assume_symmetric_posterior = assume_symmetric_posterior
         self.maximize = True
         if posterior_transform is not None:
             unsupported_error_message = (
@@ -80,8 +94,12 @@ def forward(self, X: Tensor) -> Tensor:
             A `(b)`-dim Tensor of the acquisition function at the given
             design points `X`.
         """
-        discrete_posterior = self.model.posterior(X)
-        if not self.maximize:
+        discrete_posterior = self.model.posterior(
+            X,
+            pending_X=self.X_pending,
+            negate_train_ys=(not self.maximize) and self.assume_symmetric_posterior,
+        )
+        if not self.maximize and not self.assume_symmetric_posterior:
             discrete_posterior.borders = -torch.flip(discrete_posterior.borders, [0])
             discrete_posterior.probabilities = torch.flip(
                 discrete_posterior.probabilities, [-1]
@@ -124,6 +142,7 @@ def __init__(
         model: Model,
         best_f: Tensor,
         posterior_transform: PosteriorTransform | None = None,
+        assume_symmetric_posterior: bool = True,
     ) -> None:
         r"""
         Initialize the DiscretizedExpectedImprovement
@@ -134,7 +153,11 @@ def __init__(
                 The model should be a `PFNModel`.
             best_f: A tensor representing the current best observed value.
         """
-        super().__init__(model=model, posterior_transform=posterior_transform)
+        super().__init__(
+            model=model,
+            posterior_transform=posterior_transform,
+            assume_symmetric_posterior=assume_symmetric_posterior,
+        )
         self.register_buffer("best_f", torch.as_tensor(best_f))
 
     def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
@@ -187,6 +210,30 @@ def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
         return result.clamp_min(0)
 
 
+class DiscretizedNoisyExpectedImprovement(DiscretizedExpectedImprovement):
+    def __init__(
+        self,
+        model: Model,
+        posterior_transform: PosteriorTransform | None = None,
+        X_pending: Tensor | None = None,
+    ) -> None:
+        r"""
+        Only works with models trained specifically for this.
+
+        Args:
+            model: A fitted model that is used to compute the posterior
+                distribution over the outcomes of interest.
+                The model should be a `PFNModel`.
+            best_f: A tensor representing the current best observed value.
+        """
+        super().__init__(
+            model=model,
+            posterior_transform=posterior_transform,
+            best_f=0.0,
+        )
+        self.set_X_pending(X_pending)
+
+
 class DiscretizedProbabilityOfImprovement(DiscretizedAcquistionFunction):
     r"""DiscretizedProbabilityOfImprovement is an acquisition function that
     computes the probability of improvement over the current best observed value
@@ -198,6 +245,7 @@ def __init__(
         model: Model,
         best_f: Tensor,
         posterior_transform: PosteriorTransform | None = None,
+        assume_symmetric_posterior: bool = True,
     ) -> None:
         r"""
         Initialize the DiscretizedProbabilityOfImprovement
@@ -209,7 +257,11 @@ def __init__(
             best_f: A tensor representing the current best observed value.
         """
 
-        super().__init__(model, posterior_transform)
+        super().__init__(
+            model,
+            posterior_transform,
+            assume_symmetric_posterior=assume_symmetric_posterior,
+        )
         self.register_buffer("best_f", torch.as_tensor(best_f))
 
     def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
 
@@ -37,6 +37,7 @@
 
 from botorch_community.acquisition.discretized import (
     DiscretizedExpectedImprovement,
+    DiscretizedNoisyExpectedImprovement,
     DiscretizedProbabilityOfImprovement,
 )
 from botorch_community.acquisition.scorebo import qSelfCorrectingBayesianOptimization
@@ -78,6 +79,33 @@ def construct_inputs_best_f(
     }
 
 
+@acqf_input_constructor(DiscretizedNoisyExpectedImprovement)
+def construct_inputs_noisy(
+    model: Model,
+    posterior_transform: PosteriorTransform | None = None,
+    X_pending: Optional[Tensor] = None,
+) -> dict[str, Any]:
+    r"""Construct kwargs for the acquisition functions requiring `best_f`.
+
+    Args:
+        model: The model to be used in the acquisition function.
+        best_f: Threshold above (or below) which improvement is defined.
+        posterior_transform: The posterior transform to be used in the
+            acquisition function.
+        X_pending: Points already tried, but not yet included in the
+            training data.
+
+
+    Returns:
+        A dict mapping kwarg names of the constructor to values.
+    """
+    return {
+        "model": model,
+        "posterior_transform": posterior_transform,
+        "X_pending": X_pending,
+    }
+
+
 @acqf_input_constructor(
     qBayesianQueryByComittee,
     qBayesianVarianceReduction,
 
@@ -187,6 +187,7 @@ def posterior(
         output_indices: Optional[list[int]] = None,
         observation_noise: Union[bool, Tensor] = False,
         posterior_transform: Optional[PosteriorTransform] = None,
+        negate_train_ys: bool = False,
     ) -> BoundedRiemannPosterior:
         r"""Computes the posterior over model outputs at the provided points.
 
@@ -200,6 +201,8 @@ def posterior(
             output_indices: **Currently not supported for PFNModel.**
             observation_noise: **Currently not supported for PFNModel**.
             posterior_transform: **Currently not supported for PFNModel**.
+            negate_train_ys: Whether to negate the training Ys. This is useful
+                for minimization.
 
         Returns:
             A `BoundedRiemannPosterior`, representing a batch of b? x q?`
@@ -218,17 +221,14 @@ def posterior(
         if posterior_transform is not None:
             raise UnsupportedError("posterior_transform is not supported for PFNModel.")
 
-        X, train_X, train_Y, orig_X_shape = self._prepare_data(X)
+        X, train_X, train_Y, orig_X_shape = self._prepare_data(
+            X, negate_train_ys=negate_train_ys
+        )
 
-        styles = get_styles(
-            model=self.pfn,
+        styles = self._get_styles(
             hps=self.style_hyperparameters,
             batch_size=X.shape[0],
-            device=X.device,
         )
-        if self.style is not None:
-            assert styles == {}, "Cannot provide both style and style_hyperparameters."
-            styles["style"] = self.style[None].repeat(X.shape[0], 1, 1).to(X.device)
 
         probabilities = self.pfn_predict(
             X=X,
@@ -246,7 +246,9 @@ def posterior(
             probabilities=probabilities,
         )
 
-    def _prepare_data(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor, torch.Size]:
+    def _prepare_data(
+        self, X: Tensor, negate_train_ys: bool = False
+    ) -> tuple[Tensor, Tensor, Tensor, torch.Size]:
         orig_X_shape = X.shape  # X has shape b? x q? x d
         if len(X.shape) > 3:
             raise UnsupportedError(f"X must be at most 3-d, got {X.shape}.")
@@ -256,9 +258,29 @@ def _prepare_data(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor, torch.Size]:
         X = self.transform_inputs(X)  # shape (b , q, d)
 
         train_X = match_batch_shape(self.transformed_X, X)  # shape (b, n, d)
-        train_Y = match_batch_shape(self.train_Y, X)  # shape (b, n, 1)
+        if negate_train_ys:
+            assert self.train_Y.mean().abs() < 1e-4, "train_Y must be zero-centered."
+        train_Y = match_batch_shape(
+            -self.train_Y if negate_train_ys else self.train_Y, X
+        )  # shape (b, n, 1)
         return X, train_X, train_Y, orig_X_shape
 
+    def _get_styles(self, hps, batch_size) -> dict[str, Tensor]:
+        style_kwargs = get_styles(
+            model=self.pfn,
+            hps=hps,
+            batch_size=batch_size,
+            device=self.train_X.device,
+        )
+        if self.style is not None:
+            assert (
+                style_kwargs == {}
+            ), "Cannot provide both style and style_hyperparameters."
+            style_kwargs["style"] = (
+                self.style[None].repeat(batch_size, 1, 1).to(self.train_X.device)
+            )
+        return style_kwargs
+
     def pfn_predict(
         self,
         X: Tensor,
@@ -277,6 +299,7 @@ def pfn_predict(
 
         Returns: probabilities (b, q, num_buckets) for Riemann posterior.
         """
+
         if not self.batch_first:
             X = X.transpose(0, 1)  # shape (q, b, d)
             train_X = train_X.transpose(0, 1)  # shape (n, b, d)
@@ -300,6 +323,93 @@ def borders(self):
         return self.pfn.criterion.borders.to(self.train_X.dtype)
 
 
+class PFNModelWithPendingPoints(PFNModel):
+    def posterior(
+        self,
+        X: Tensor,
+        output_indices: Optional[list[int]] = None,
+        observation_noise: Union[bool, Tensor] = False,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        pending_X: Optional[Tensor] = None,
+        negate_train_ys: bool = False,
+    ) -> BoundedRiemannPosterior:
+        r"""Computes the posterior over model outputs at the provided points.
+
+        Note: The input transforms should be applied here using
+            `self.transform_inputs(X)` after the `self.eval()` call and before
+            any `model.forward` or `model.likelihood` calls.
+
+        Args:
+            X: A b? x q? x d`-dim Tensor, where `d` is the dimension of the
+                feature space.
+            output_indices: **Currently not supported for PFNModel.**
+            observation_noise: **Currently not supported for PFNModel**.
+            posterior_transform: **Currently not supported for PFNModel**.
+            pending_X: A tensor of shape n'' x d, where n'' is the number of
+                pending points, which are to be observed but the value is
+                not yet known.
+            negate_train_ys: Whether to negate the training Ys. This is useful
+                for minimization.
+
+        Returns:
+            A `BoundedRiemannPosterior`, representing a batch of b? x q?`
+            distributions.
+        """
+        self.pfn.eval()
+        if output_indices is not None:
+            raise UnsupportedError(
+                "output_indices is not None. PFNModel should not "
+                "be a multi-output model."
+            )
+        if observation_noise:
+            logger.warning(
+                "observation_noise is not supported for PFNModel and is being ignored."
+            )
+        if posterior_transform is not None:
+            raise UnsupportedError("posterior_transform is not supported for PFNModel.")
+
+        X, train_X, train_Y, orig_X_shape = self._prepare_data(
+            X, negate_train_ys=negate_train_ys
+        )
+
+        styles = self._get_styles(
+            hps=self.style_hyperparameters,
+            batch_size=X.shape[0],
+        )
+
+        if pending_X is not None:
+            assert pending_X.dim() == 2, "pending_X must be 2-dimensional."
+            pending_X = pending_X[None].repeat(X.shape[0], 1, 1)  # shape (b, n', d)
+            train_X = torch.cat([train_X, pending_X], dim=1)  # shape (b, n+n', d)
+            train_Y = torch.cat(
+                [
+                    train_Y,
+                    torch.full(
+                        (train_Y.shape[0], pending_X.shape[1], 1),
+                        torch.nan,
+                        device=train_Y.device,
+                    ),
+                ],
+                dim=1,
+            )  # shape (b, n+n', 1)
+
+        probabilities = self.pfn_predict(
+            X=X,
+            train_X=train_X,
+            train_Y=train_Y,
+            **self.constant_model_kwargs,
+            **styles,
+        )  # (b, q, num_buckets)
+        probabilities = probabilities.view(
+            *orig_X_shape[:-1], -1
+        )  # (b?, q?, num_buckets)
+
+        return BoundedRiemannPosterior(
+            borders=self.borders,
+            probabilities=probabilities,
+        )
+
+
 class MultivariatePFNModel(PFNModel):
     """A multivariate PFN model that returns a joint posterior over q batch inputs.