opencobra · ripaul · Jul 2, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 4, 2025
diff --git a/setup.cfg b/setup.cfg
@@ -84,6 +84,8 @@ cobra =
 [options.extras_require]
 array = 
 	scipy
+chrr = 
+	hopsy
 development = 
 	black
 	bumpversion

diff --git a/src/cobra/sampling/__init__.py b/src/cobra/sampling/__init__.py
@@ -1,5 +1,11 @@
 from .hr_sampler import HRSampler, shared_np_array
 from .achr import ACHRSampler
 from .core import step
+
+from .hopsy import hopsy_is_available
+
+if hopsy_is_available:
+    from .hopsy import HopsySampler
+
 from .optgp import OptGPSampler
 from .sampling import sample
diff --git a/src/cobra/sampling/hopsy.py b/src/cobra/sampling/hopsy.py
@@ -0,0 +1,214 @@
+"""Provide hopsy samplers."""
+
+from typing import TYPE_CHECKING, Optional
+
+
+if TYPE_CHECKING:
+    from cobra import Model
+
+from ..core.configuration import Configuration
+from .hr_sampler import HRSampler
+
+
+__all__ = (
+    "HopsySampler",
+    "hopsy_is_available",
+)
+
+configuration = Configuration()
+
+try:
+    import hopsy
+    import numpy as np
+    import pandas as pd
+
+    hopsy_is_available = True
+
+    # for the time being while the updated hopsy version isn't released
+    def back_transform(problem, samples):
+        """
+        Transform samples from the sampling space to the original parameter space.
+
+        Parameters
+        ----------
+        problem : hopsy.Problem
+          A hopsy.Problem with populated `problem.transformation` and `problem.shift` of
+          shape `(m, d)` and `(d, )`, respectively. If not populated, the identity
+          operation is performed.
+        samples : np.array
+          Parameter samples of shape `(..., d)` which are to be transformed.
+
+        Returns
+        -------
+        np.array
+          Transformed samples of shape `(..., m)`
+        """
+        S, h = problem.transformation, problem.shift
+        transformed = samples @ S.T if S is not None else samples
+        shifted = transformed + h if h is not None else transformed
+        return shifted
+
+    class HopsySampler(HRSampler):
+        """
+        Generic sampling wrapper for hopsy samplers.
+
+        Parameters
+        ----------
+        model : cobra.Model
+            The cobra model from which to generate samples.
+        sampler : hopsy.Proposal, optional
+            The hopsy sampler for sampling the polytope
+            (default hopsy.UniformCoordinateHitAndRunProposal).
+        processes: int, optional
+            The number of processes used during sampling
+            (default cobra.Configuration.processes).
+        thinning : int, optional
+            The thinning factor of the generated sampling chain. A thinning of
+            10 means samples are returned every 10 steps (default 100).
+        nproj : int > 0, optional
+            How often to reproject the sampling point into the feasibility
+            space. Avoids numerical issues at the cost of lower sampling. If
+            you observe many equality constraint violations with
+            `sampler.validate` you should lower this number (default None).
+        rounding : bool, optional
+            Whether to precondition the sampling problem by applying an offline rounding
+            transformation.
+        seed : int > 0, optional
+            Sets the random number seed. Initialized to the current time stamp
+            if None (default None).
+
+        Attributes
+        ----------
+        n_samples : int
+            The total number of samples that have been generated by this
+            sampler instance.
+        problem : typing.NamedTuple
+            A NamedTuple whose attributes define the entire sampling problem in
+            matrix form.
+        fwd_idx : numpy.array
+            A numpy array having one entry for each reaction in the model,
+            containing the index of the respective forward variable.
+        rev_idx : numpy.array
+            A numpy array having one entry for each reaction in the model,
+            containing the index of the respective reverse variable.
+        sampler :
+            The hopsy sampler type.
+        mcs : list
+            List of length `processes` of hopsy.MarkovChain objects.
+        rngs : list
+            List of length `processes` of hopsy.RandomNumberGenerator objects.
+
+        """
+
+        def __init__(
+            self,
+            model: "Model",
+            sampler: hopsy.Proposal = hopsy.UniformCoordinateHitAndRunProposal,
+            thinning: int = 100,
+            processes: Optional[int] = None,
+            nproj: Optional[int] = None,
+            seed: Optional[int] = None,
+            rounding: bool = True,
+            **kwargs,
+        ) -> None:
+            """Initialize a new HopsySampler."""
+            super().__init__(model, thinning, nproj=nproj, seed=seed, **kwargs)
+
+            if self.problem.inequalities.shape[0] > 0:
+                A = self.problem.inequalities
+                b = self.problem.bounds
+            else:
+                # empty dummy constraint to set problem dimension
+                A = np.ones((0, self.problem.equalities.shape[1]))
+                b = np.ones(0)
+
+            problem = hopsy.Problem(A, b)
+            problem = hopsy.add_box_constraints(
+                problem,
+                self.problem.variable_bounds[0],
+                self.problem.variable_bounds[1],
+                simplify=False,
+            )
+            problem = hopsy.add_equality_constraints(
+                problem, self.problem.equalities, self.problem.b
+            )
+            problem = hopsy.round(problem, simplify=False) if rounding else problem
+
+            self.sampler = sampler
+
+            # batched transformation to better exploit vectorization
+            self.transformation = problem.transformation.copy()
+            self.shift = problem.shift.copy()
+
+            problem.transformation = None
+            problem.shift = None
+
+            self._problem = problem
+
+            if processes is None:
+                self.processes = configuration.processes
+            else:
+                self.processes = processes
+
+            self.mcs = None
+            self.rngs = None
+
+            self.mcs = [
+                hopsy.MarkovChain(self._problem, self.sampler) for i in range(processes)
+            ]
+            self.rngs = [
+                hopsy.RandomNumberGenerator(self._seed, i) for i in range(processes)
+            ]
+
+        def sample(self, n: int, fluxes: bool = True) -> pd.DataFrame:
+            """Generate a set of samples by calling hopsy.
+
+            Parameters
+            ----------
+            n : int
+                The minimum number of samples that are generated at once.
+            fluxes : bool, optional
+                Whether to return fluxes or the internal solver variables. If
+                set to False, will return a variable for each forward and
+                backward flux as well as all additional variables you might
+                have defined in the model (default True).
+
+            Returns
+            -------
+            pandas.DataFrame
+                Returns a pandas DataFrame with `n` rows, each containing a
+                flux sample.
+            """
+            _, samples = hopsy.sample(
+                self.mcs,
+                self.rngs,
+                n_samples=n // self.processes,
+                thinning=self.thinning,
+                n_procs=self.processes,
+            )
+
+            # batched transformation to better exploit vectorization
+            self._problem.transformation = self.transformation
+            self._problem.shift = self.shift
+
+            samples = back_transform(self._problem, samples)
+
+            self._problem.transformation = None
+            self._problem.shift = None
+
+            samples = samples.reshape(-1, samples.shape[-1])  # flatten chain dim
+
+            if fluxes:
+                names = [r.id for r in self.model.reactions]
+
+                return pd.DataFrame(
+                    samples[:, self.fwd_idx] - samples[:, self.rev_idx],
+                    columns=names,
+                )
+            else:
+                names = [v.name for v in self.model.variables]
+
+                return pd.DataFrame(samples, columns=names)
+
+except ModuleNotFoundError:
+    hopsy_is_available = False
diff --git a/src/cobra/sampling/sampling.py b/src/cobra/sampling/sampling.py
@@ -5,6 +5,14 @@
 import pandas as pd
 
 from .achr import ACHRSampler
+from .hopsy import hopsy_is_available
+
+
+if hopsy_is_available:
+    from .hopsy import HopsySampler
+else:
+    import warnings
+
 from .optgp import OptGPSampler
 
 
@@ -15,22 +23,30 @@
 def sample(
     model: "Model",
     n: int,
-    method: str = "optgp",
+    method: str = "chrr",
     thinning: int = 100,
     processes: int = 1,
     seed: Optional[int] = None,
 ) -> pd.DataFrame:
     """Sample valid flux distributions from a cobra model.
 
-    Currently, two methods are supported:
+    Currently, three methods are supported:
+
+    1. 'chrr' (default) which uses a the coordinate Hit-and-Run algorithm
+        with rounding ([1]_), which has been shown to often outperform OptGP
+        and ACHR, refer [2]_, [3]_. The rounding transformation is performed
+        in an offline manner which inflicts a certain base cost, however
+        sampling is performed using the C++-based library ``hopsy`` ([4]_)
+        and is, thus, blazingly fast. Moreover, this method supports
+        parallel sampling.
 
-    1. 'optgp' (default) which uses the OptGPSampler that supports parallel
+    2. 'optgp'  which uses the OptGPSampler that supports parallel
         sampling. Requires large numbers of samples to be performant
         (`n` > 1000). For smaller samples, 'achr' might be better suited.
-        For details, refer [1]_ .
+        For details, refer [5]_ .
 
-    2. 'achr' which uses artificial centering hit-and-run. This is a single
-       process method with good convergence. For details, refer [2]_ .
+    3. 'achr' which uses artificial centering hit-and-run. This is a single
+       process method with good convergence. For details, refer [6]_ .
 
     Parameters
     ----------
@@ -48,8 +64,8 @@ def sample(
         in benchmarks gives approximately uncorrelated samples. If set to 1
         will return all iterates (default 100).
     processes : int, optional
-        Only used for 'optgp'. The number of processes used to generate
-        samples (default 1).
+        Only used for 'optgp' and the hopsy samplers. The number of processes
+        used to generate samples (default 1).
     seed : int > 0, optional
         Sets the random number seed. Initialized to the current time stamp
         if None (default None).
@@ -69,28 +85,61 @@ def sample(
 
     References
     ----------
-    .. [1] Megchelenbrink W, Huynen M, Marchiori E (2014)
+    .. [1] Hulda S Haraldsdóttir, Ben Cousins, Ines Thiele, Ronan M.T Fleming,
+       Santosh Vempala,
+       CHRR: coordinate hit-and-run with rounding for uniform sampling of
+       constraint-based models,
+       Bioinformatics, Volume 33, Issue 11, June 2017, Pages 1741–1743,
+       https://doi.org/10.1093/bioinformatics/btx052
+
+    .. [2] Herrmann, H.A., Dyson, B.C., Vass, L. et al.
+       Flux sampling is a powerful tool to study metabolism under changing
+       environmental conditions.
+       npj Syst Biol Appl 5, 32 (2019).
+       https://doi.org/10.1038/s41540-019-0109-0
+
+    .. [3] Fallahi S, Skaug HJ, Alendal G (2020)
+       A comparison of Monte Carlo sampling methods for
+       metabolic network models.
+       PLoS ONE 15(7): e0235393.
+       https://doi.org/10.1371/journal.pone.0235393
+
+    .. [4] Richard D Paul, Johann F Jadebeck, Anton Stratmann, Wolfgang Wiechert,
+       Katharina Nöh,
+       hopsy — a methods marketplace for convex polytope sampling in Python,
+       Bioinformatics, Volume 40, Issue 7, July 2024, btae430,
+       https://doi.org/10.1093/bioinformatics/btae430
+
+    .. [5] Megchelenbrink W, Huynen M, Marchiori E (2014)
        optGpSampler: An Improved Tool for Uniformly Sampling the Solution-Space
        of Genome-Scale Metabolic Networks.
        PLoS ONE 9(2): e86587.
        https://doi.org/10.1371/journal.pone.0086587
 
-    .. [2] Direction Choice for Accelerated Convergence in Hit-and-Run Sampling
+    .. [6] Direction Choice for Accelerated Convergence in Hit-and-Run Sampling
        David E. Kaufman, Robert L. Smith
        Operations Research 199846:1 , 84-95
        https://doi.org/10.1287/opre.46.1.84
 
     """
+    if not hopsy_is_available and method == "chrr":
+        method = "optgp"
+        warnings.warn(
+            "hopsy and thus chrr are not available, switching to optgp.", stacklevel=2
+        )
+
     if method == "optgp":
         sampler = OptGPSampler(model, processes=processes, thinning=thinning, seed=seed)
     elif method == "achr":
         sampler = ACHRSampler(model, thinning=thinning, seed=seed)
+    elif method == "chrr":
+        sampler = HopsySampler(
+            model, processes=processes, thinning=thinning, seed=seed, rounding=True
+        )
     else:
         raise ValueError(
             f'Invalid value: "{method}" for method used. '
-            'The value must be "optgp" or "achr".'
+            'The value must be "optgp", "achr" or "chrr".'
         )
 
-    return pd.DataFrame(
-        columns=[rxn.id for rxn in model.reactions], data=sampler.sample(n)
-    )
+    return sampler.sample(n)
-Original file line number
+Diff line change
@@ Expand Up / @@ -84,6 +84,8 @@ cobra = @@
     [options.extras_require]
     array =
     	scipy
+    chrr =
+    	hopsy
     development =
     	black
     	bumpversion
@@ Expand Down @@