Data-Simply
diff --git a/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/examples/cross_shop.ipynb
Lines changed: 3 additions & 1 deletion b/‎docs/examples/cross_shop.ipynb
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/examples/gain_loss.ipynb
Lines changed: 3 additions & 1 deletion b/‎docs/examples/gain_loss.ipynb
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/examples/segmentation.ipynb
Lines changed: 4 additions & 4 deletions b/‎docs/examples/segmentation.ipynb
Lines changed: 4 additions & 4 deletions
diff --git a/‎pyproject.toml
Lines changed: 13 additions & 91 deletions b/‎pyproject.toml
Lines changed: 13 additions & 91 deletions
diff --git a/‎pyretailscience/analysis/cross_shop.py
Lines changed: 0 additions & 1 deletion b/‎pyretailscience/analysis/cross_shop.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pyretailscience/analysis/haversine.py
Lines changed: 1 addition & 0 deletions b/‎pyretailscience/analysis/haversine.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyretailscience/analysis/product_association.py
Lines changed: 38 additions & 76 deletions b/‎pyretailscience/analysis/product_association.py
Lines changed: 38 additions & 76 deletions
@@ -1,12 +1,12 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.2.2"
+    rev: "v0.11.0"
     hooks:
       - id: ruff
         args: ["--fix"]
       - id: ruff-format
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
 
@@ -238,7 +238,9 @@
    "source": [
     "shoes_idx = df[\"category_1_name\"] == \"Shoes\"\n",
     "df.loc[shoes_idx, \"category_1_name\"] = np.random.RandomState(42).choice(\n",
-    "    [\"Shoes\", \"Jeans\"], size=shoes_idx.sum(), p=[0.5, 0.5],\n",
+    "    [\"Shoes\", \"Jeans\"],\n",
+    "    size=shoes_idx.sum(),\n",
+    "    p=[0.5, 0.5],\n",
     ")"
    ]
   },
 
@@ -254,7 +254,9 @@
     "# Reasign half the rows to Calvin Klein and leave the other half as Diesel\n",
     "p2_diesel_idx = time_period_2 & (df[\"brand_name\"] == \"Diesel\")\n",
     "df.loc[p2_diesel_idx, \"brand_name\"] = np.random.RandomState(42).choice(\n",
-    "    [\"Calvin Klein\", \"Diesel\"], size=p2_diesel_idx.sum(), p=[0.75, 0.25],\n",
+    "    [\"Calvin Klein\", \"Diesel\"],\n",
+    "    size=p2_diesel_idx.sum(),\n",
+    "    p=[0.75, 0.25],\n",
     ")\n",
     "\n",
     "# Apply a 20% discount to Calvin Klein products and increase the quantity by 50%\n",
 
@@ -701,10 +701,10 @@
     "    },\n",
     "    color=\"black\",\n",
     "    bbox={\n",
-    "        \"facecolor\":\"white\",\n",
-    "        \"edgecolor\":\"white\",\n",
-    "        \"boxstyle\":\"round,rounding_size=0.75\",\n",
-    "        \"pad\":0.75,\n",
+    "        \"facecolor\": \"white\",\n",
+    "        \"edgecolor\": \"white\",\n",
+    "        \"boxstyle\": \"round,rounding_size=0.75\",\n",
+    "        \"pad\": 0.75,\n",
     "    },\n",
     "    linespacing=1.5,\n",
     ")\n",
 
@@ -1,129 +1,51 @@
 [project]
 name = "pyretailscience"
-version = "0.9.0"
+version = "0.10.0"
 description = "Retail Data Science Tools"
 requires-python = ">=3.10,<3.13"
 readme = "README.md"
 license = "Elastic-2.0"
-dependencies = [
-    "pandas>=2.1.4,<3",
-    "pyarrow>=14.0.2,<15",
-    "matplotlib>=3.9.1,<4",
-    "numpy>=1.26.3,<2",
-    "loguru>=0.7.2,<0.8",
-    "tqdm>=4.66.1,<5",
-    "scipy>=1.13.0,<2",
-    "scikit-learn>=1.4.2,<2",
-    "matplotlib-set-diagrams~=0.0.2",
-    "toml>=0.10.2,<0.11",
-    "duckdb>=1.0.0,<2",
-    "graphviz>=0.20.3,<0.21",
-    "ibis-framework[duckdb]>=9.5.0,<10",
-]
+dependencies = [ "pandas>=2.1.4,<3", "pyarrow>=14.0.2,<15", "matplotlib>=3.9.1,<4", "numpy>=1.26.3,<2", "loguru>=0.7.2,<0.8", "tqdm>=4.66.1,<5", "scipy>=1.13.0,<2", "scikit-learn>=1.4.2,<2", "matplotlib-set-diagrams~=0.0.2", "toml>=0.10.2,<0.11", "duckdb>=1.0.0,<2", "graphviz>=0.20.3,<0.21", "ibis-framework[duckdb]>=9.5.0,<10",]
 [[project.authors]]
 name = "Murray Vanwyk"
 email = "[email protected]"
 
 [dependency-groups]
-dev = [
-    "pytest>=8.0.0,<9",
-    "pytest-cov>=4.1.0,<5",
-    "nbstripout>=0.7.1,<0.8",
-    "ruff>=0.9,<0.10",
-    "pre-commit>=3.6.2,<4",
-    "pytest-mock>=3.14.0,<4",
-]
-examples = ["jupyterlab>=4.2.5,<5", "tqdm>=4.66.1,<5"]
-docs = [
-    "mkdocs-material>=9.5.4,<10",
-    "mkdocstrings[python]>=0.24.0,<0.25",
-    "mkdocs>=1.5.3,<2",
-    "mkdocs-jupyter>=0.24.6,<0.25",
-]
+dev = [ "pytest>=8.0.0,<9", "pytest-cov>=4.1.0,<5", "nbstripout>=0.7.1,<0.8", "ruff>=0.9,<0.10", "pre-commit>=3.6.2,<4", "pytest-mock>=3.14.0,<4",]
+examples = [ "jupyterlab>=4.2.5,<5", "tqdm>=4.66.1,<5",]
+docs = [ "mkdocs-material>=9.5.4,<10", "mkdocstrings[python]>=0.24.0,<0.25", "mkdocs>=1.5.3,<2", "mkdocs-jupyter>=0.24.6,<0.25",]
 
 [build-system]
-requires = ["hatchling"]
+requires = [ "hatchling",]
 build-backend = "hatchling.build"
 
 [tool.uv]
-default-groups = ["dev", "examples", "docs"]
+default-groups = [ "dev", "examples", "docs",]
 
 [tool.ruff]
 target-version = "py310"
 line-length = 120
 show-fixes = true
 
 [tool.ruff.lint]
-ignore = ["ANN101", "ANN102", "EM101", "TRY003", "PT011", "PTH123", "SLF001"]
-select = [
-    "A",
-    "ANN",
-    "ARG",
-    "B",
-    "BLE",
-    "C4",
-    "C90",
-    "COM",
-    "D",
-    "D1",
-    "D2",
-    "D3",
-    "D4",
-    "DTZ",
-    "EM",
-    "ERA",
-    "EXE",
-    "F",
-    "FA",
-    "FLY",
-    "G",
-    "I",
-    "ICN",
-    "INP",
-    "INT",
-    "ISC",
-    "N",
-    "NPY",
-    "PERF",
-    "PGH",
-    "PIE",
-    "PL",
-    "PT",
-    "PTH",
-    "PYI",
-    "Q",
-    "RET",
-    "RUF",
-    "RSE",
-    "S",
-    "SIM",
-    "SLF",
-    "SLOT",
-    "T10",
-    "T20",
-    "TCH",
-    "TID",
-    "TRY",
-    "UP",
-    "W",
-    "YTT",
-]
+ignore = [ "ANN101", "ANN102", "EM101", "TRY003", "PT011", "PTH123", "SLF001",]
+select = [ "A", "ANN", "ARG", "B", "BLE", "C4", "C90", "COM", "D", "D1", "D2", "D3", "D4", "DTZ", "EM", "ERA", "EXE", "F", "FA", "FLY", "G", "I", "ICN", "INP", "INT", "ISC", "N", "NPY", "PERF", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "Q", "RET", "RUF", "RSE", "S", "SIM", "SLF", "SLOT", "T10", "T20", "TCH", "TID", "TRY", "UP", "W", "YTT",]
 
 [tool.pytest.ini_options]
 addopts = "--cov=pyretailscience --cov-report=term-missing  --cov-branch"
 
 [tool.coverage.run]
 branch = true
-source = ["pyretailscience"]
+source = [ "pyretailscience",]
 
 [tool.coverage.report]
 show_missing = true
 skip_covered = true
 
 [tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["F401", "F403", "F405", "D104"]
-"tests/*" = ["ANN", "ARG", "INP001", "S101", "SLF001"]
-"*.ipynb" = ["T201"]
+"__init__.py" = [ "F401", "F403", "F405", "D104",]
+"tests/*" = [ "ANN", "ARG", "INP001", "S101", "SLF001",]
+"*.ipynb" = [ "T201",]
 
 [tool.ruff.lint.pylint]
 max-args = 15
 
@@ -1,6 +1,5 @@
 """This module contains the CrossShop class that is used to create a cross-shop diagram."""
 
-
 import ibis
 import matplotlib.pyplot as plt
 import pandas as pd
 
@@ -21,6 +21,7 @@
 - **Requires Ibis-Compatible Backend**: Ensure your Ibis backend supports trigonometric functions.
 - **Assumes Spherical Earth**: Uses the Haversine formula, which introduces slight inaccuracies due to Earth's oblate shape.
 """
+
 import ibis
 
 
 
@@ -35,7 +35,6 @@
 operations, and drive business growth.
 """
 
-
 import ibis
 import pandas as pd
 
@@ -163,7 +162,7 @@ def _calc_association(
             group_col (str, optional): The name of the column that identifies unique transactions or customers. Defaults
                 to option column.unit_spend.
             target_item (str or None, optional): A specific product to focus the association analysis on. If None,
-                associations for all products are calculated. Defaults to   None.
+                associations for all products are calculated. Defaults to None.
             min_occurrences (int, optional): The minimum number of occurrences required for each product in the
                 association analysis. Defaults to 1. Must be at least 1.
             min_cooccurrences (int, optional): The minimum number of co-occurrences required for the product pairs in
@@ -207,7 +206,7 @@ def _calc_association(
         if isinstance(df, pd.DataFrame):
             df = ibis.memtable(df)
 
-        unique_transactions = df
+        unique_transactions = df.select(df[group_col], df[value_col]).distinct()
         total_transactions = unique_transactions.alias("t")[group_col].nunique().name("total_count")
 
         product_occurrences = (
@@ -219,15 +218,22 @@ def _calc_association(
             .filter(lambda t: t.occurrences >= min_occurrences)
         )
 
-        left_table = unique_transactions.mutate(item_1=unique_transactions[value_col]).drop(value_col)
-        right_table = unique_transactions.mutate(item_2=unique_transactions[value_col]).drop(value_col)
-
+        left_table = unique_transactions.rename({"item_1": value_col})
+        right_table = unique_transactions.rename({"item_2": value_col})
+
+        join_logic = [left_table[group_col] == right_table[group_col]]
+        if target_item is None:
+            join_logic.append(left_table["item_1"] < right_table["item_2"])
+        else:
+            join_logic.extend(
+                [
+                    left_table["item_1"] != right_table["item_2"],
+                    left_table["item_1"] == target_item,
+                ],
+            )
         merged_df = left_table.join(
             right_table,
-            predicates=[
-                left_table[group_col] == right_table[group_col],
-                left_table["item_1"] < right_table["item_2"],
-            ],
+            predicates=join_logic,
             lname="",
             rname="{name}_right",
         )
@@ -239,14 +245,12 @@ def _calc_association(
             {"item_2": value_col, "occurrences_2": "occurrences", "occurrence_probability_2": "occurrence_probability"},
         )
 
-        merged_df = ibis.join(
-            merged_df,
+        merged_df = merged_df.join(
             product_occurrences_1,
             predicates=[merged_df["item_1"] == product_occurrences_1["item_1"]],
         )
 
-        merged_df = ibis.join(
-            merged_df,
+        merged_df = merged_df.join(
             product_occurrences_2,
             predicates=[merged_df["item_2"] == product_occurrences_2["item_2"]],
         )
@@ -266,13 +270,11 @@ def _calc_association(
             {"item_2": value_col, "occurrences_2": "occurrences", "prob_2": "occurrence_probability"},
         )
 
-        product_pairs = ibis.join(
-            cooccurrences,
+        product_pairs = cooccurrences.join(
             product_occurrences_1_rename,
             predicates=[cooccurrences["item_1"] == product_occurrences_1_rename["item_1"]],
         )
-        product_pairs = ibis.join(
-            product_pairs,
+        product_pairs = product_pairs.join(
             product_occurrences_2_rename,
             predicates=[product_pairs["item_2"] == product_occurrences_2_rename["item_2"]],
         )
@@ -282,74 +284,34 @@ def _calc_association(
             uplift=product_pairs["support"] / (product_pairs["prob_1"] * product_pairs["prob_2"]),
         )
 
-        result = product_pairs.filter(
-            (product_pairs.confidence >= min_confidence) & (product_pairs.uplift >= min_uplift),
-        )
-
-        inverse_pairs = result.rename(
-            {
-                f"{value_col}_2": "item_1",
-                f"{value_col}_1": "item_2",
-                "occurrences_2": "occurrences_1",
-                "occurrences_1": "occurrences_2",
-            },
-        )
-
-        product_occurrences_1_rename2 = product_occurrences.rename({f"{value_col}_1": value_col})
-        product_occurrences_2_rename2 = product_occurrences.rename({f"{value_col}_2": value_col})
+        result = product_pairs.filter(product_pairs.uplift >= min_uplift)
 
-        inverse_pairs = ibis.join(
-            inverse_pairs,
-            product_occurrences_1_rename2,
-            predicates=[inverse_pairs[f"{value_col}_1"] == product_occurrences_1_rename2[f"{value_col}_1"]],
-        )
-        inverse_pairs = ibis.join(
-            inverse_pairs,
-            product_occurrences_2_rename2,
-            predicates=[inverse_pairs[f"{value_col}_2"] == product_occurrences_2_rename2[f"{value_col}_2"]],
-        )
-        inverse_pairs = inverse_pairs.mutate(
-            confidence=inverse_pairs["cooccurrences"] / inverse_pairs["occurrences_1"],
-            uplift=inverse_pairs["support"] / (inverse_pairs["prob_1"] * inverse_pairs["prob_2"]),
-        )
-
-        result = result.rename({f"{value_col}_1": "item_1", f"{value_col}_2": "item_2"})
-        result = result[
-            [
-                f"{value_col}_1",
-                f"{value_col}_2",
+        if target_item is None:
+            col_order = [
+                "item_1",
+                "item_2",
                 "occurrences_1",
                 "occurrences_2",
                 "cooccurrences",
                 "support",
                 "confidence",
                 "uplift",
             ]
-        ]
-        inverse_pairs = inverse_pairs[
-            [
-                f"{value_col}_1",
-                f"{value_col}_2",
-                "occurrences_1",
-                "occurrences_2",
-                "cooccurrences",
-                "support",
-                "confidence",
-                "uplift",
-            ]
-        ]
-
-        result = result.execute()
-        inverse_pairs = inverse_pairs.execute()
+            inverse_pairs = result.mutate(
+                item_1=result["item_2"],
+                item_2=result["item_1"],
+                occurrences_1=result["occurrences_2"],
+                occurrences_2=result["occurrences_1"],
+                prob_1=result["prob_2"],
+                prob_2=result["prob_1"],
+                confidence=result["cooccurrences"] / result["occurrences_2"],
+            )
+            result = result[col_order].union(inverse_pairs[col_order])
 
-        final_result = (
-            pd.concat([result, inverse_pairs], ignore_index=True)
-            .sort_values(by=[f"{value_col}_1", f"{value_col}_2"])
-            .reset_index(drop=True)
-        )
+        result = result.filter(result.confidence >= min_confidence)
 
-        if target_item is not None:
-            final_result = final_result[final_result[f"{value_col}_1"] == target_item].reset_index(drop=True)
+        final_result = result.execute().sort_values(by=["item_1", "item_2"]).reset_index(drop=True)
+        final_result = final_result.rename(columns={"item_1": f"{value_col}_1", "item_2": f"{value_col}_2"})
 
         return final_result[
             [