Data-Simply
diff --git a/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/analysis_modules.md
Lines changed: 5 additions & 5 deletions b/‎docs/analysis_modules.md
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/examples/cross_shop.ipynb
Lines changed: 3 additions & 1 deletion b/‎docs/examples/cross_shop.ipynb
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/examples/gain_loss.ipynb
Lines changed: 3 additions & 1 deletion b/‎docs/examples/gain_loss.ipynb
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/examples/segmentation.ipynb
Lines changed: 4 additions & 4 deletions b/‎docs/examples/segmentation.ipynb
Lines changed: 4 additions & 4 deletions
diff --git a/‎pyretailscience/analysis/cross_shop.py
Lines changed: 0 additions & 1 deletion b/‎pyretailscience/analysis/cross_shop.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pyretailscience/analysis/haversine.py
Lines changed: 1 addition & 0 deletions b/‎pyretailscience/analysis/haversine.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyretailscience/analysis/segmentation.py
Lines changed: 36 additions & 17 deletions b/‎pyretailscience/analysis/segmentation.py
Lines changed: 36 additions & 17 deletions
diff --git a/‎pyretailscience/plots/time.py
Lines changed: 0 additions & 1 deletion b/‎pyretailscience/plots/time.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pyretailscience/plots/venn.py
Lines changed: 1 addition & 0 deletions b/‎pyretailscience/plots/venn.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/analysis/test_cross_shop.py
Lines changed: 1 addition & 1 deletion b/‎tests/analysis/test_cross_shop.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/analysis/test_haversine.py
Lines changed: 2 additions & 1 deletion b/‎tests/analysis/test_haversine.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/analysis/test_product_association.py
Lines changed: 3 additions & 3 deletions b/‎tests/analysis/test_product_association.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎tests/analysis/test_revenue_tree.py
Lines changed: 1 addition & 1 deletion b/‎tests/analysis/test_revenue_tree.py
Lines changed: 1 addition & 1 deletion
@@ -1,12 +1,12 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.2.2"
+    rev: "v0.11.0"
     hooks:
       - id: ruff
         args: ["--fix"]
       - id: ruff-format
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
 
@@ -834,11 +834,11 @@ rfm_segmenter = RFMSegmentation(df=data, current_date=current_date)
 rfm_results = rfm_segmenter.df
 ```
 
-| customer_id | recency_days | frequency | monetary | r_score | f_score | m_score | rfm_segment |
-|-------------|--------------|-----------|----------|---------|---------|---------|-------------|
-| 3           | 147          | 3         | 750      | 0       | 0       | 0       | 0           |
-| 2           | 127          | 2         | 250      | 1       | 2       | 1       | 121         |
-| 1           | 113          | 2         | 125      | 2       | 1       | 2       | 212         |
+| customer_id | recency_days | frequency | monetary | r_score | f_score | m_score | rfm_segment | fm_segment |
+|-------------|--------------|-----------|----------|---------|---------|---------|-------------|------------|
+| 1           | 113          | 2         | 125      | 0       | 0       | 0       | 0           | 0          |
+| 2           | 127          | 2         | 250      | 1       | 1       | 1       | 111         | 11         |
+| 3           | 147          | 3         | 750      | 2       | 2       | 2       | 222         | 22         |
 
 ### Purchases Per Customer
 
 
@@ -238,7 +238,9 @@
    "source": [
     "shoes_idx = df[\"category_1_name\"] == \"Shoes\"\n",
     "df.loc[shoes_idx, \"category_1_name\"] = np.random.RandomState(42).choice(\n",
-    "    [\"Shoes\", \"Jeans\"], size=shoes_idx.sum(), p=[0.5, 0.5],\n",
+    "    [\"Shoes\", \"Jeans\"],\n",
+    "    size=shoes_idx.sum(),\n",
+    "    p=[0.5, 0.5],\n",
     ")"
    ]
   },
 
@@ -254,7 +254,9 @@
     "# Reasign half the rows to Calvin Klein and leave the other half as Diesel\n",
     "p2_diesel_idx = time_period_2 & (df[\"brand_name\"] == \"Diesel\")\n",
     "df.loc[p2_diesel_idx, \"brand_name\"] = np.random.RandomState(42).choice(\n",
-    "    [\"Calvin Klein\", \"Diesel\"], size=p2_diesel_idx.sum(), p=[0.75, 0.25],\n",
+    "    [\"Calvin Klein\", \"Diesel\"],\n",
+    "    size=p2_diesel_idx.sum(),\n",
+    "    p=[0.75, 0.25],\n",
     ")\n",
     "\n",
     "# Apply a 20% discount to Calvin Klein products and increase the quantity by 50%\n",
 
@@ -701,10 +701,10 @@
     "    },\n",
     "    color=\"black\",\n",
     "    bbox={\n",
-    "        \"facecolor\":\"white\",\n",
-    "        \"edgecolor\":\"white\",\n",
-    "        \"boxstyle\":\"round,rounding_size=0.75\",\n",
-    "        \"pad\":0.75,\n",
+    "        \"facecolor\": \"white\",\n",
+    "        \"edgecolor\": \"white\",\n",
+    "        \"boxstyle\": \"round,rounding_size=0.75\",\n",
+    "        \"pad\": 0.75,\n",
     "    },\n",
     "    linespacing=1.5,\n",
     ")\n",
 
@@ -1,6 +1,5 @@
 """This module contains the CrossShop class that is used to create a cross-shop diagram."""
 
-
 import ibis
 import matplotlib.pyplot as plt
 import pandas as pd
 
@@ -21,6 +21,7 @@
 - **Requires Ibis-Compatible Backend**: Ensure your Ibis backend supports trigonometric functions.
 - **Assumes Spherical Earth**: Uses the Haversine formula, which introduces slight inaccuracies due to Earth's oblate shape.
 """
+
 import ibis
 
 
 
@@ -193,7 +193,7 @@ class SegTransactionStats:
     def __init__(
         self,
         data: pd.DataFrame | ibis.Table,
-        segment_col: str = "segment_name",
+        segment_col: str | list[str] = "segment_name",
         extra_aggs: dict[str, tuple[str, str]] | None = None,
     ) -> None:
         """Calculates transaction statistics by segment.
@@ -203,7 +203,8 @@ def __init__(
                 customer_id, unit_spend and transaction_id. If the dataframe contains the column unit_quantity, then
                 the columns unit_spend and unit_quantity are used to calculate the price_per_unit and
                 units_per_transaction.
-            segment_col (str, optional): The column to use for the segmentation. Defaults to "segment_name".
+            segment_col (str | list[str], optional): The column or list of columns to use for the segmentation.
+                Defaults to "segment_name".
             extra_aggs (dict[str, tuple[str, str]], optional): Additional aggregations to perform.
                 The keys in the dictionary will be the column names for the aggregation results.
                 The values are tuples with (column_name, aggregation_function), where:
@@ -212,11 +213,14 @@ def __init__(
                 Example: {"stores": ("store_id", "nunique")} would count unique store_ids.
         """
         cols = ColumnHelper()
+
+        if isinstance(segment_col, str):
+            segment_col = [segment_col]
         required_cols = [
             cols.customer_id,
             cols.unit_spend,
             cols.transaction_id,
-            segment_col,
+            *segment_col,
         ]
         if cols.unit_qty in data.columns:
             required_cols.append(cols.unit_qty)
@@ -274,14 +278,14 @@ def _get_col_order(include_quantity: bool) -> list[str]:
     @staticmethod
     def _calc_seg_stats(
         data: pd.DataFrame | ibis.Table,
-        segment_col: str,
+        segment_col: list[str],
         extra_aggs: dict[str, tuple[str, str]] | None = None,
     ) -> ibis.Table:
         """Calculates the transaction statistics by segment.
 
         Args:
             data (pd.DataFrame | ibis.Table): The transaction data.
-            segment_col (str): The column to use for the segmentation.
+            segment_col (list[str]): The columns to use for the segmentation.
             extra_aggs (dict[str, tuple[str, str]], optional): Additional aggregations to perform.
                 The keys in the dictionary will be the column names for the aggregation results.
                 The values are tuples with (column_name, aggregation_function).
@@ -315,7 +319,7 @@ def _calc_seg_stats(
 
         # Calculate metrics for segments and total
         segment_metrics = data.group_by(segment_col).aggregate(**aggs)
-        total_metrics = data.aggregate(**aggs).mutate(segment_name=ibis.literal("Total"))
+        total_metrics = data.aggregate(**aggs).mutate({col: ibis.literal("Total") for col in segment_col})
         total_customers = data[cols.customer_id].nunique()
 
         # Cross join with total_customers to make it available for percentage calculation
@@ -344,7 +348,7 @@ def df(self) -> pd.DataFrame:
         if self._df is None:
             cols = ColumnHelper()
             col_order = [
-                self.segment_col,
+                *self.segment_col,
                 *SegTransactionStats._get_col_order(include_quantity=cols.agg_unit_qty in self.table.columns),
             ]
 
@@ -393,18 +397,23 @@ def plot(
         Raises:
             ValueError: If the sort_order is not "ascending", "descending" or None.
             ValueError: If the orientation is not "vertical" or "horizontal".
+            ValueError: If multiple segment columns are used, as plotting is only supported for a single segment column.
         """
         if sort_order not in ["ascending", "descending", None]:
             raise ValueError("sort_order must be either 'ascending' or 'descending' or None")
         if orientation not in ["vertical", "horizontal"]:
             raise ValueError("orientation must be either 'vertical' or 'horizontal'")
+        if len(self.segment_col) > 1:
+            raise ValueError("Plotting is only supported for a single segment column")
 
         default_title = f"{value_col.title()} by Segment"
         kind = "bar"
         if orientation == "horizontal":
             kind = "barh"
 
-        val_s = self.df.set_index(self.segment_col)[value_col]
+        # Use the first segment column for plotting
+        plot_segment_col = self.segment_col[0]
+        val_s = self.df.set_index(plot_segment_col)[value_col]
         if hide_total:
             val_s = val_s[val_s.index != "Total"]
 
@@ -462,7 +471,7 @@ class RFMSegmentation:
 
     _df: pd.DataFrame | None = None
 
-    def __init__(self, df: pd.DataFrame | ibis.Table, current_date: str | None = None) -> None:
+    def __init__(self, df: pd.DataFrame | ibis.Table, current_date: str | datetime.date | None = None) -> None:
         """Initializes the RFM segmentation process.
 
         Args:
@@ -472,8 +481,8 @@ def __init__(self, df: pd.DataFrame | ibis.Table, current_date: str | None = Non
                 - transaction_date
                 - unit_spend
                 - transaction_id
-            current_date (Optional[str]): The reference date for calculating recency (format: "YYYY-MM-DD").
-                If not provided, the current system date will be used.
+            current_date (Optional[Union[str, datetime.date]]): The reference date for calculating recency.
+                Can be a string (format: "YYYY-MM-DD"), a date object, or None (defaults to the current system date).
 
         Raises:
             ValueError: If the dataframe is missing required columns.
@@ -491,9 +500,13 @@ def __init__(self, df: pd.DataFrame | ibis.Table, current_date: str | None = Non
         if missing_cols:
             error_message = f"Missing required columns: {missing_cols}"
             raise ValueError(error_message)
-        current_date = (
-            datetime.date.fromisoformat(current_date) if current_date else datetime.datetime.now(datetime.UTC).date()
-        )
+
+        if isinstance(current_date, str):
+            current_date = datetime.date.fromisoformat(current_date)
+        elif current_date is None:
+            current_date = datetime.datetime.now(datetime.UTC).date()
+        elif not isinstance(current_date, datetime.date):
+            raise TypeError("current_date must be a string in 'YYYY-MM-DD' format, a datetime.date object, or None")
 
         self.table = self._compute_rfm(df, current_date)
 
@@ -537,13 +550,19 @@ def _compute_rfm(self, df: ibis.Table, current_date: datetime.date) -> ibis.Tabl
             m_score=(ibis.ntile(10).over(window_monetary)),
         )
 
-        rfm_segment = (rfm_scores.r_score * 100 + rfm_scores.f_score * 10 + rfm_scores.m_score).name("rfm_segment")
-
-        return rfm_scores.mutate(rfm_segment=rfm_segment)
+        return rfm_scores.mutate(
+            rfm_segment=(rfm_scores.r_score * 100 + rfm_scores.f_score * 10 + rfm_scores.m_score),
+            fm_segment=(rfm_scores.f_score * 10 + rfm_scores.m_score),
+        )
 
     @property
     def df(self) -> pd.DataFrame:
         """Returns the dataframe with the segment names."""
         if self._df is None:
             self._df = self.table.execute().set_index(get_option("column.customer_id"))
         return self._df
+
+    @property
+    def ibis_table(self) -> ibis.Table:
+        """Returns the computed Ibis table with RFM segmentation."""
+        return self.table
@@ -33,7 +33,6 @@
 - **Helper functions**: Utilizes utility functions from the `pyretailscience` package to handle styling, formatting, and other plot adjustments.
 """
 
-
 import numpy as np
 import pandas as pd
 from matplotlib.axes import Axes, SubplotBase
 
@@ -22,6 +22,7 @@
 - **Pre-Aggregated Data Required**: The module does not perform data aggregation; input data should already be structured correctly.
 
 """
+
 from collections.abc import Callable
 
 import pandas as pd
 
@@ -9,7 +9,7 @@
 cols = ColumnHelper()
 
 
-@pytest.fixture()
+@pytest.fixture
 def sample_data():
     """Sample data for testing."""
     return pd.DataFrame(
 
@@ -1,12 +1,13 @@
 """Tests for the haversine distance module."""
+
 import ibis
 import pandas as pd
 import pytest
 
 from pyretailscience.analysis.haversine import haversine_distance
 
 
-@pytest.fixture()
+@pytest.fixture
 def sample_ibis_table():
     """Fixture to provide a sample Ibis table for testing."""
     data = {
 
@@ -12,7 +12,7 @@
 class TestProductAssociations:
     """Tests for the ProductAssociations class."""
 
-    @pytest.fixture()
+    @pytest.fixture
     def transactions_df(self) -> pd.DataFrame:
         """Return a sample DataFrame for testing."""
         # fmt: off
@@ -23,7 +23,7 @@ def transactions_df(self) -> pd.DataFrame:
         })
         # fmt: on
 
-    @pytest.fixture()
+    @pytest.fixture
     def expected_results_single_items_df(self) -> pd.DataFrame:
         """Return the expected results for the single items association analysis."""
         # fmt: off
@@ -58,7 +58,7 @@ def expected_results_single_items_df(self) -> pd.DataFrame:
         )
         # fmt: on
 
-    @pytest.fixture()
+    @pytest.fixture
     def expected_results_pair_items_df(self) -> pd.DataFrame:
         """Return the expected results for the pair items association analysis."""
         # fmt: off
 
@@ -12,7 +12,7 @@
 class TestRevenueTree:
     """Test the RevenueTree class."""
 
-    @pytest.fixture()
+    @pytest.fixture
     def cols(self):
         """Return a ColumnHelper instance."""
         return ColumnHelper()