Replace enums with string literals

tmke8 · tmke8 · commit 790c70bc4989 · 2025-08-22T18:48:25.000+02:00
diff --git a/fair_forge/datasets.py b/fair_forge/datasets.py
@@ -1,6 +1,5 @@
-from enum import Enum
 from pathlib import Path
-from typing import NamedTuple, Protocol
+from typing import Literal, NamedTuple, Protocol
 
 import numpy as np
 from numpy.typing import NDArray
@@ -46,9 +45,7 @@ class GroupDataset(NamedTuple):
     feature_names: list[str]
 
 
-class AdultGroup(Enum):
-    SEX = "Sex"
-    RACE = "Race"
+type AdultGroup = Literal["Sex", "Race"]
 
 
 def load_adult(
@@ -66,7 +63,7 @@ def load_adult(
     Returns:
         A Dataset object containing the Adult dataset.
     """
-    name = f"Adult {group.value}"
+    name = f"Adult {group}"
     if binarize_nationality:
         name += ", binary nationality"
     if binarize_race:
@@ -116,16 +113,18 @@ def load_adult(
     groups: NDArray[np.int32]
     to_drop: str
     match group:
-        case AdultGroup.SEX:
+        case "Sex":
             groups = (
                 df.get_column("sex").cat.starts_with("Male").cast(pl.Int32).to_numpy()
             )
             to_drop = "sex"
-        case AdultGroup.RACE:
+        case "Race":
             # `.to_physical()` converts the categorical column to its physical representation,
             # which is UInt32 by default in Polars.
             groups = df.get_column("race").to_physical().cast(pl.Int32).to_numpy()
             to_drop = "race"
+        case _:
+            raise ValueError(f"Invalid group: {group}")
     if not group_in_features:
         df = df.drop(to_drop)
         column_grouping_prefixes.remove(to_drop)
diff --git a/fair_forge/eval.py b/fair_forge/eval.py
@@ -1,6 +1,5 @@
 from collections.abc import Mapping, Sequence
-from enum import Enum
-from typing import Any, cast
+from typing import Any, Literal, cast
 
 import polars as pl
 
@@ -12,12 +11,7 @@
 
 __all__ = ["Split", "evaluate"]
 
-
-class Split(Enum):
-    """Enum for different split methods used in evaluation."""
-
-    BASIC = "basic"
-    PROPORTIONAL = "proportional"
+type Split = Literal["basic", "proportional"] | SplitMethod
 
 
 def evaluate(
@@ -28,7 +22,7 @@ def evaluate(
     *,
     preprocessor: Preprocessor | None = None,
     repeat: int = 1,
-    split: Split | SplitMethod = Split.PROPORTIONAL,
+    split: Split = "proportional",
     seed: int = 42,
     train_percentage: float = 0.8,
     remove_score_suffix: bool = True,
@@ -41,10 +35,15 @@ def evaluate(
         split_seed = seed + repeat_index
         split_method: SplitMethod
         match split:
-            case Split.BASIC:
+            case "basic":
                 split_method = basic_split
-            case Split.PROPORTIONAL:
+            case "proportional":
                 split_method = proportional_split
+            case str() as split_method:
+                raise ValueError(
+                    f"Invalid split method: {split_method}. "
+                    "Use 'basic', 'proportional', or a custom SplitMethod instance."
+                )
             case _:
                 split_method = split
         train_idx, test_idx = split_method(
diff --git a/fair_forge/methods.py b/fair_forge/methods.py
@@ -1,8 +1,7 @@
 """Protocols and implementations of methods for fairness-aware machine learning."""
 
 from dataclasses import asdict, dataclass
-from enum import Enum
-from typing import Any, Protocol, Self
+from typing import Any, Literal, Protocol, Self
 
 import numpy as np
 from numpy.typing import NDArray
@@ -49,13 +48,7 @@ def fit(
     ) -> Self: ...
 
 
-class FairnessType(Enum):
-    DP = "dp"
-    """Demographic Parity (DP)"""
-    EQ_OPP = "eq_opp"
-    """Equal Opportunity (EQ_OPP)"""
-    EQ_ODDS = "eq_odds"
-    """Equalized Odds (EQ_ODDS)"""
+type FairnessType = Literal["dp", "eq_opp", "eq_odds"]
 
 
 @dataclass
diff --git a/fair_forge/metrics.py b/fair_forge/metrics.py
@@ -1,7 +1,7 @@
 from collections.abc import Callable, Sequence
 from dataclasses import dataclass
-from enum import Enum, Flag, auto
-from typing import Protocol, override
+from enum import Flag, auto
+from typing import Literal, Protocol, override
 
 import numpy as np
 from numpy.typing import NDArray
@@ -57,11 +57,7 @@ def __call__(
     ) -> Float: ...
 
 
-class LabelType(Enum):
-    """The variable that is compared to the predictions in order to check how similar they are."""
-
-    S = "s"
-    Y = "y"
+type LabelType = Literal["group", "y"]
 
 
 @dataclass
@@ -72,12 +68,12 @@ class RenyiCorrelation(GroupMetric):
     titled "On Measures of Dependence" by Alfréd Rényi.
     """
 
-    base: LabelType = LabelType.S
+    base: LabelType = "group"
 
     @property
     def __name__(self) -> str:
         """The name of the metric."""
-        return f"renyi_{self.base.value}"
+        return f"renyi_{self.base}"
 
     @override
     def __call__(
@@ -287,7 +283,7 @@ def as_group_metric(
     """Turn a sequence of metrics into a list of group metrics."""
     metrics = []
     for metric in base_metrics:
-        if agg & MetricAgg.DIFF:
+        if MetricAgg.DIFF in agg:
             metrics.append(
                 _BinaryAggMetric(
                     metric=metric,
@@ -296,7 +292,7 @@ def as_group_metric(
                     aggregator=lambda i, j: j - i,
                 )
             )
-        if agg & MetricAgg.RATIO:
+        if MetricAgg.RATIO in agg:
             metrics.append(
                 _BinaryAggMetric(
                     metric=metric,
@@ -305,7 +301,7 @@ def as_group_metric(
                     aggregator=lambda i, j: i / j if j != 0 else np.float64(np.nan),
                 )
             )
-        if agg & MetricAgg.MIN:
+        if MetricAgg.MIN in agg:
             metrics.append(
                 _MulticlassAggMetric(
                     metric=metric,
@@ -314,7 +310,7 @@ def as_group_metric(
                     aggregator=np.min,
                 )
             )
-        if agg & MetricAgg.MAX:
+        if MetricAgg.MAX in agg:
             metrics.append(
                 _MulticlassAggMetric(
                     metric=metric,
@@ -323,7 +319,7 @@ def as_group_metric(
                     aggregator=np.max,
                 )
             )
-        if agg & MetricAgg.INDIVIDUAL:
+        if MetricAgg.INDIVIDUAL in agg:
             metrics.append(
                 _BinaryAggMetric(
                     metric=metric,
diff --git a/fair_forge/nn/beutel.py b/fair_forge/nn/beutel.py
@@ -154,7 +154,7 @@ class Beutel(BaseEstimator, GroupBasedTransform):
     adv_size: list[int] = field(default_factory=lambda: [40])
     pred_size: list[int] = field(default_factory=lambda: [40])
     adv_weight: float = 1.0
-    fairness: FairnessType = FairnessType.DP
+    fairness: FairnessType = "dp"
     batch_size: int = 64
     iters: int = 500
     random_state: int = 42
@@ -182,12 +182,14 @@ def loss_fn(model: Model, x: Array, y: Array, s: Array) -> Array:
                 ).mean()
 
             match self.fairness:
-                case FairnessType.EQ_OPP:
+                case "eq_opp":
                     mask = y > 0.5
-                case FairnessType.EQ_ODDS:
+                case "eq_odds":
                     raise NotImplementedError("Not implemented Eq. Odds yet")
-                case FairnessType.DP:
+                case "dp":
                     mask = jnp.ones(s.shape, dtype=jnp.bool)
+                case _:
+                    raise ValueError("Invalid fairness value")
             if s_size > 1:
                 adversary_loss = optax.softmax_cross_entropy_with_integer_labels(
                     logits=s_hat, labels=s, where=mask
diff --git a/fair_forge/preprocessing/group_pre_method.py b/fair_forge/preprocessing/group_pre_method.py
@@ -1,7 +1,6 @@
 from dataclasses import dataclass
-from enum import Enum
 import itertools
-from typing import Any, Self
+from typing import Any, Literal, Self
 
 import numpy as np
 from numpy.typing import NDArray
@@ -54,17 +53,12 @@ def set_params(self, **params: Any) -> Self:
         return ret
 
 
-class UpsampleStrategy(Enum):
-    """Strategy for upsampling."""
-
-    UNIFORM = "uniform"
-    # PREFERENTIAL = "preferential"
-    NAIVE = "naive"
+type UpsampleStrategy = Literal["uniform", "naive"]  # , "preferential"]
 
 
 @dataclass
 class Upsampler(BaseEstimator, GroupDatasetModifier):
-    strategy: UpsampleStrategy = UpsampleStrategy.UNIFORM
+    strategy: UpsampleStrategy = "uniform"
     random_state: int = 0
 
     def fit(
@@ -89,7 +83,7 @@ def fit(
         vals = list([d[1] for d in data])
 
         for mask, length, y_eq_y, s_eq_s in data:
-            if self.strategy is UpsampleStrategy.NAIVE:
+            if self.strategy == "naive":
                 percentages.append((mask, (np.max(vals) / length).astype(np.float64)))
             else:
                 num_samples = len(y)
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
@@ -5,7 +5,7 @@
 
 def test_adult_gender():
     data = ff.load_adult(
-        group=ff.AdultGroup.SEX,
+        group="Sex",
         group_in_features=False,
         binarize_nationality=False,
         binarize_race=False,
@@ -32,7 +32,7 @@ def test_adult_gender():
 
 def test_adult_race():
     data = ff.load_adult(
-        group=ff.AdultGroup.RACE,
+        group="Race",
         group_in_features=False,
         binarize_nationality=False,
         binarize_race=False,
@@ -56,7 +56,7 @@ def test_adult_race():
 
 def test_adult_race_binary():
     data = ff.load_adult(
-        group=ff.AdultGroup.RACE,
+        group="Race",
         group_in_features=False,
         binarize_nationality=True,
         binarize_race=True,
@@ -80,7 +80,7 @@ def test_adult_race_binary():
 
 def test_adult_gender_in_features():
     data = ff.load_adult(
-        group=ff.AdultGroup.SEX,
+        group="Sex",
         group_in_features=True,
         binarize_nationality=True,
         binarize_race=False,
diff --git a/tests/test_eval.py b/tests/test_eval.py
@@ -22,7 +22,7 @@ def test_pipeline_with_dummy():
         metrics=metrics,
         group_metrics=group_metrics,
         repeat=2,
-        split=ff.Split.BASIC,
+        split="basic",
         seed=42,
         train_percentage=0.8,
         remove_score_suffix=True,
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -9,15 +9,15 @@ def test_renyi():
     y_pred = np.array([1, 0, 1, 0, 0, 1], dtype=np.int32)
     groups = np.array([1, 0, 1, 0, 0, 1], dtype=np.int32)
 
-    renyi_y = ff.RenyiCorrelation(ff.LabelType.Y)
+    renyi_y = ff.RenyiCorrelation("y")
     result = renyi_y(y_true=y_true, y_pred=y_pred, groups=groups)
     np.testing.assert_allclose(result, 1 / 3)
     assert renyi_y.__name__ == "renyi_y"
 
-    renyi_s = ff.RenyiCorrelation(ff.LabelType.S)
+    renyi_s = ff.RenyiCorrelation("group")
     result = renyi_s(y_true=y_true, y_pred=y_pred, groups=groups)
     np.testing.assert_allclose(result, 1.0)
-    assert renyi_s.__name__ == "renyi_s"
+    assert renyi_s.__name__ == "renyi_group"
 
 
 def test_prob_pos():
diff --git a/tests/test_pre_methods.py b/tests/test_pre_methods.py
@@ -9,7 +9,7 @@ def test_upsampler():
     y = np.array([0, 0, 1, 1, 1], dtype=np.int32)
     groups = np.array([0, 1, 0, 1, 1], dtype=np.int32)
     lr = LogisticRegression(random_state=41, max_iter=10)
-    upsampler = ff.Upsampler(strategy=ff.UpsampleStrategy.NAIVE, random_state=41)
+    upsampler = ff.Upsampler(strategy="naive", random_state=41)
     pipeline = ff.GroupPipeline(group_data_modifier=upsampler, estimator=lr)
     pipeline.set_params(random_state=42)
     assert pipeline.get_params()["estimator__random_state"] == 42