Data-Simply
diff --git a/‎.github/workflows/bigquery-integration.yml‎
Lines changed: 0 additions & 3 deletions b/‎.github/workflows/bigquery-integration.yml‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 35 deletions b/‎README.md‎
Lines changed: 2 additions & 35 deletions
diff --git a/‎pyretailscience/segmentation/threshold.py‎
Lines changed: 3 additions & 6 deletions b/‎pyretailscience/segmentation/threshold.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎tests/integration/bigquery/test_cohort_analysis.py‎
Lines changed: 0 additions & 13 deletions b/‎tests/integration/bigquery/test_cohort_analysis.py‎
Lines changed: 0 additions & 13 deletions
diff --git a/‎tests/integration/bigquery/test_composite_rank.py‎
Lines changed: 32 additions & 62 deletions b/‎tests/integration/bigquery/test_composite_rank.py‎
Lines changed: 32 additions & 62 deletions
@@ -55,9 +55,6 @@ jobs:
         with:
           credentials_json: ${{ secrets.GCP_SA_KEY }}
 
-      - name: Set up Google Cloud SDK
-        uses: google-github-actions/setup-gcloud@v2
-
       - name: Run Integration Tests
         env:
           TEST_SUITE: ${{ inputs.test_suite }}
 
@@ -220,18 +220,7 @@ code paths function correctly when connected to BigQuery.
 
 ## Test Coverage
 
-The integration tests cover the following analysis modules:
-
-- **Cohort Analysis** - Tests customer cohort retention metrics
-- **Cross Shop Analysis** - Tests product/category cross-shopping patterns
-- **Customer Analysis** - Tests customer lifetime value and purchase frequency metrics
-- **Gain Loss Analysis** - Tests comparative performance analysis
-- **Haversine Analysis** - Tests geographic distance calculations
-- **Product Association Analysis** - Tests market basket analysis
-- **Customer Decision Hierarchy** - Tests customer purchase decision patterns
-- **Revenue Tree Analysis** - Tests hierarchical revenue breakdowns
-- **Composite Rank Analysis** - Tests weighted ranking of entities
-- **Segmentation Analysis** - Tests RFM and value-frequency customer segmentation
+The integration tests cover the analysis modules.
 
 ## Prerequisites
 
@@ -256,8 +245,7 @@ export GCP_PROJECT_ID=your-project-id
 - Install dependencies:
 
 ```bash
-uv pip install -e .
-uv pip install "ibis-framework[bigquery]>=10.0.0,<11"
+uv sync
 ```
 
 - Run the tests:
@@ -286,24 +274,3 @@ To run the workflow in GitHub Actions, add these secrets to your repository:
 
 - `GCP_SA_KEY`: The entire JSON content of your GCP service account key file
 - `GCP_PROJECT_ID`: Your GCP project ID
-
-## Test Data
-
-The tests expect a BigQuery dataset named `test_data` with a table named `transactions` containing the following columns:
-
-- `transaction_id`
-- `transaction_date`
-- `transaction_time`
-- `customer_id`
-- `product_id`
-- `product_name`
-- `category_0_name`
-- `category_0_id`
-- `category_1_name`
-- `category_1_id`
-- `brand_name`
-- `brand_id`
-- `unit_quantity`
-- `unit_cost`
-- `unit_spend`
-- `store_id`
@@ -83,14 +83,11 @@ def __init__(
         window = ibis.window(order_by=ibis.asc(df[value_col]))
         df = df.mutate(ptile=ibis.percent_rank().over(window))
 
-        case = ibis.case()
-
+        case_args = []
         for quantile, segment in zip(thresholds, segments, strict=True):
-            case = case.when(df["ptile"] <= quantile, segment)
-
-        case = case.end()
+            case_args.append((df["ptile"] <= quantile, segment))
 
-        df = df.mutate(segment_name=case).drop(["ptile"])
+        df = df.mutate(segment_name=ibis.cases(*case_args)).drop(["ptile"])
 
         if zero_value_customers == "separate_segment":
             df = ibis.union(df, zero_df)
 
@@ -34,16 +34,3 @@ def test_invalid_period(self, transactions_table):
                 aggregation_column="unit_spend",
                 period=invalid_period,
             )
-
-    def test_cohort_percentage(self, transactions_table):
-        """Tests cohort analysis with percentage=True."""
-        cohort = CohortAnalysis(
-            df=transactions_table,
-            aggregation_column="unit_spend",
-            agg_func="sum",
-            period="month",
-            percentage=True,
-        )
-        result = cohort.table
-        assert not result.empty
-        assert result.max().max() <= 1.0, "Values should be <= 1 when percentage=True"
@@ -13,101 +13,71 @@ def test_transactions_df(self, transactions_table):
         """Fetch test transactions data from BigQuery and convert to DataFrame.
 
         This fixture assumes a table with columns like product_id, spend, customers, etc.
-        Modify the query and column names as per your actual BigQuery table structure.
         """
         df = transactions_table.to_pandas()
 
-        if "spend_per_customer" not in df.columns:
-            df["spend_per_customer"] = df["unit_spend"] / df["customer_id"]
+        df["spend_per_customer"] = df["unit_spend"] / df["customer_id"]
 
         return df
 
-    def test_composite_rank_with_bigquery_data(self, test_transactions_df):
-        """Test CompositeRank functionality with real BigQuery data.
+    def test_composite_rank_functionality(self, test_transactions_df):
+        """Test core CompositeRank functionality with BigQuery data.
 
-        This test demonstrates using CompositeRank with BigQuery-sourced data.
+        This test validates the basic functionality of CompositeRank including:
         """
         rank_cols = [
             ("unit_spend", "desc"),
             ("customer_id", "desc"),
             ("spend_per_customer", "desc"),
         ]
 
-        cr = CompositeRank(
+        CompositeRank(
             df=test_transactions_df,
             rank_cols=rank_cols,
             agg_func="mean",
             ignore_ties=False,
         )
 
-        assert "composite_rank" in cr.df.columns
-        assert len(cr.df) > 0
-
-        expected_rank_columns = [
-            "unit_spend_rank",
-            "customer_id_rank",
-            "spend_per_customer_rank",
-            "composite_rank",
-        ]
-        for col in expected_rank_columns:
-            assert col in cr.df.columns
-
-    def test_different_agg_functions_with_bigquery(self, test_transactions_df):
-        """Test different aggregation functions with BigQuery data."""
-        agg_functions = ["mean", "sum", "min", "max"]
+    @pytest.mark.parametrize(
+        "agg_func",
+        ["mean", "sum", "min", "max"],
+    )
+    def test_different_agg_functions_with_bigquery(self, test_transactions_df, agg_func):
+        """Test different aggregation functions with BigQuery data.
 
+        Args:
+            test_transactions_df: The test transactions DataFrame
+            agg_func: The aggregation function to test (parametrized)
+        """
         rank_cols = [
             ("unit_spend", "desc"),
             ("customer_id", "desc"),
             ("spend_per_customer", "desc"),
         ]
 
-        for agg_func in agg_functions:
-            cr = CompositeRank(
-                df=test_transactions_df,
-                rank_cols=rank_cols,
-                agg_func=agg_func,
-                ignore_ties=False,
-            )
-
-            assert "composite_rank" in cr.df.columns
-            assert len(cr.df) > 0
-
-    def test_ignore_ties_with_bigquery(self, test_transactions_df):
-        """Test tie-breaking behavior with BigQuery data."""
-        rank_cols = [("unit_spend", "desc")]
-
-        cr_with_ties = CompositeRank(
+        CompositeRank(
             df=test_transactions_df,
             rank_cols=rank_cols,
-            agg_func="mean",
+            agg_func=agg_func,
             ignore_ties=False,
         )
 
-        cr_no_ties = CompositeRank(
-            df=test_transactions_df,
-            rank_cols=rank_cols,
-            agg_func="mean",
-            ignore_ties=True,
-        )
+    @pytest.mark.parametrize(
+        "ignore_ties",
+        [False, True],
+    )
+    def test_ignore_ties_with_bigquery(self, test_transactions_df, ignore_ties):
+        """Test tie-breaking behavior with BigQuery data.
 
-        assert "unit_spend_rank" in cr_with_ties.df.columns
-        assert "unit_spend_rank" in cr_no_ties.df.columns
+        Args:
+            test_transactions_df: The test transactions DataFrame
+            ignore_ties: Whether to ignore ties when calculating ranks
+        """
+        rank_cols = [("unit_spend", "desc")]
 
-    def test_ibis_table_input(self, transactions_table):
-        """Explicitly test Ibis table input for CompositeRank."""
-        cr = CompositeRank(
-            df=transactions_table,
-            rank_cols=[("unit_spend", "desc"), ("customer_id", "desc")],
+        CompositeRank(
+            df=test_transactions_df,
+            rank_cols=rank_cols,
             agg_func="mean",
-            ignore_ties=False,
+            ignore_ties=ignore_ties,
         )
-
-        expected_columns = [
-            "unit_spend_rank",
-            "customer_id_rank",
-            "composite_rank",
-        ]
-
-        for col in expected_columns:
-            assert col in cr.df.columns