Skip to content

Commit 7bec26b

Browse files
committed
fix: change the code to check ibis functionality rather than the output
1 parent f51ea78 commit 7bec26b

14 files changed

+515
-1505
lines changed

.github/workflows/bigquery-integration.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@ jobs:
5555
with:
5656
credentials_json: ${{ secrets.GCP_SA_KEY }}
5757

58-
- name: Set up Google Cloud SDK
59-
uses: google-github-actions/setup-gcloud@v2
60-
6158
- name: Run Integration Tests
6259
env:
6360
TEST_SUITE: ${{ inputs.test_suite }}

README.md

Lines changed: 2 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -220,18 +220,7 @@ code paths function correctly when connected to BigQuery.
220220

221221
## Test Coverage
222222

223-
The integration tests cover the following analysis modules:
224-
225-
- **Cohort Analysis** - Tests customer cohort retention metrics
226-
- **Cross Shop Analysis** - Tests product/category cross-shopping patterns
227-
- **Customer Analysis** - Tests customer lifetime value and purchase frequency metrics
228-
- **Gain Loss Analysis** - Tests comparative performance analysis
229-
- **Haversine Analysis** - Tests geographic distance calculations
230-
- **Product Association Analysis** - Tests market basket analysis
231-
- **Customer Decision Hierarchy** - Tests customer purchase decision patterns
232-
- **Revenue Tree Analysis** - Tests hierarchical revenue breakdowns
233-
- **Composite Rank Analysis** - Tests weighted ranking of entities
234-
- **Segmentation Analysis** - Tests RFM and value-frequency customer segmentation
223+
The integration tests cover the analysis modules.
235224

236225
## Prerequisites
237226

@@ -256,8 +245,7 @@ export GCP_PROJECT_ID=your-project-id
256245
- Install dependencies:
257246

258247
```bash
259-
uv pip install -e .
260-
uv pip install "ibis-framework[bigquery]>=10.0.0,<11"
248+
uv sync
261249
```
262250

263251
- Run the tests:
@@ -286,24 +274,3 @@ To run the workflow in GitHub Actions, add these secrets to your repository:
286274

287275
- `GCP_SA_KEY`: The entire JSON content of your GCP service account key file
288276
- `GCP_PROJECT_ID`: Your GCP project ID
289-
290-
## Test Data
291-
292-
The tests expect a BigQuery dataset named `test_data` with a table named `transactions` containing the following columns:
293-
294-
- `transaction_id`
295-
- `transaction_date`
296-
- `transaction_time`
297-
- `customer_id`
298-
- `product_id`
299-
- `product_name`
300-
- `category_0_name`
301-
- `category_0_id`
302-
- `category_1_name`
303-
- `category_1_id`
304-
- `brand_name`
305-
- `brand_id`
306-
- `unit_quantity`
307-
- `unit_cost`
308-
- `unit_spend`
309-
- `store_id`

pyretailscience/segmentation/threshold.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,11 @@ def __init__(
8383
window = ibis.window(order_by=ibis.asc(df[value_col]))
8484
df = df.mutate(ptile=ibis.percent_rank().over(window))
8585

86-
case = ibis.case()
87-
86+
case_args = []
8887
for quantile, segment in zip(thresholds, segments, strict=True):
89-
case = case.when(df["ptile"] <= quantile, segment)
90-
91-
case = case.end()
88+
case_args.append((df["ptile"] <= quantile, segment))
9289

93-
df = df.mutate(segment_name=case).drop(["ptile"])
90+
df = df.mutate(segment_name=ibis.cases(*case_args)).drop(["ptile"])
9491

9592
if zero_value_customers == "separate_segment":
9693
df = ibis.union(df, zero_df)

tests/integration/bigquery/test_cohort_analysis.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,3 @@ def test_invalid_period(self, transactions_table):
3434
aggregation_column="unit_spend",
3535
period=invalid_period,
3636
)
37-
38-
def test_cohort_percentage(self, transactions_table):
39-
"""Tests cohort analysis with percentage=True."""
40-
cohort = CohortAnalysis(
41-
df=transactions_table,
42-
aggregation_column="unit_spend",
43-
agg_func="sum",
44-
period="month",
45-
percentage=True,
46-
)
47-
result = cohort.table
48-
assert not result.empty
49-
assert result.max().max() <= 1.0, "Values should be <= 1 when percentage=True"

tests/integration/bigquery/test_composite_rank.py

Lines changed: 32 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -13,101 +13,71 @@ def test_transactions_df(self, transactions_table):
1313
"""Fetch test transactions data from BigQuery and convert to DataFrame.
1414
1515
This fixture assumes a table with columns like product_id, spend, customers, etc.
16-
Modify the query and column names as per your actual BigQuery table structure.
1716
"""
1817
df = transactions_table.to_pandas()
1918

20-
if "spend_per_customer" not in df.columns:
21-
df["spend_per_customer"] = df["unit_spend"] / df["customer_id"]
19+
df["spend_per_customer"] = df["unit_spend"] / df["customer_id"]
2220

2321
return df
2422

25-
def test_composite_rank_with_bigquery_data(self, test_transactions_df):
26-
"""Test CompositeRank functionality with real BigQuery data.
23+
def test_composite_rank_functionality(self, test_transactions_df):
24+
"""Test core CompositeRank functionality with BigQuery data.
2725
28-
This test demonstrates using CompositeRank with BigQuery-sourced data.
26+
This test validates the basic functionality of CompositeRank including:
2927
"""
3028
rank_cols = [
3129
("unit_spend", "desc"),
3230
("customer_id", "desc"),
3331
("spend_per_customer", "desc"),
3432
]
3533

36-
cr = CompositeRank(
34+
CompositeRank(
3735
df=test_transactions_df,
3836
rank_cols=rank_cols,
3937
agg_func="mean",
4038
ignore_ties=False,
4139
)
4240

43-
assert "composite_rank" in cr.df.columns
44-
assert len(cr.df) > 0
45-
46-
expected_rank_columns = [
47-
"unit_spend_rank",
48-
"customer_id_rank",
49-
"spend_per_customer_rank",
50-
"composite_rank",
51-
]
52-
for col in expected_rank_columns:
53-
assert col in cr.df.columns
54-
55-
def test_different_agg_functions_with_bigquery(self, test_transactions_df):
56-
"""Test different aggregation functions with BigQuery data."""
57-
agg_functions = ["mean", "sum", "min", "max"]
41+
@pytest.mark.parametrize(
42+
"agg_func",
43+
["mean", "sum", "min", "max"],
44+
)
45+
def test_different_agg_functions_with_bigquery(self, test_transactions_df, agg_func):
46+
"""Test different aggregation functions with BigQuery data.
5847
48+
Args:
49+
test_transactions_df: The test transactions DataFrame
50+
agg_func: The aggregation function to test (parametrized)
51+
"""
5952
rank_cols = [
6053
("unit_spend", "desc"),
6154
("customer_id", "desc"),
6255
("spend_per_customer", "desc"),
6356
]
6457

65-
for agg_func in agg_functions:
66-
cr = CompositeRank(
67-
df=test_transactions_df,
68-
rank_cols=rank_cols,
69-
agg_func=agg_func,
70-
ignore_ties=False,
71-
)
72-
73-
assert "composite_rank" in cr.df.columns
74-
assert len(cr.df) > 0
75-
76-
def test_ignore_ties_with_bigquery(self, test_transactions_df):
77-
"""Test tie-breaking behavior with BigQuery data."""
78-
rank_cols = [("unit_spend", "desc")]
79-
80-
cr_with_ties = CompositeRank(
58+
CompositeRank(
8159
df=test_transactions_df,
8260
rank_cols=rank_cols,
83-
agg_func="mean",
61+
agg_func=agg_func,
8462
ignore_ties=False,
8563
)
8664

87-
cr_no_ties = CompositeRank(
88-
df=test_transactions_df,
89-
rank_cols=rank_cols,
90-
agg_func="mean",
91-
ignore_ties=True,
92-
)
65+
@pytest.mark.parametrize(
66+
"ignore_ties",
67+
[False, True],
68+
)
69+
def test_ignore_ties_with_bigquery(self, test_transactions_df, ignore_ties):
70+
"""Test tie-breaking behavior with BigQuery data.
9371
94-
assert "unit_spend_rank" in cr_with_ties.df.columns
95-
assert "unit_spend_rank" in cr_no_ties.df.columns
72+
Args:
73+
test_transactions_df: The test transactions DataFrame
74+
ignore_ties: Whether to ignore ties when calculating ranks
75+
"""
76+
rank_cols = [("unit_spend", "desc")]
9677

97-
def test_ibis_table_input(self, transactions_table):
98-
"""Explicitly test Ibis table input for CompositeRank."""
99-
cr = CompositeRank(
100-
df=transactions_table,
101-
rank_cols=[("unit_spend", "desc"), ("customer_id", "desc")],
78+
CompositeRank(
79+
df=test_transactions_df,
80+
rank_cols=rank_cols,
10281
agg_func="mean",
103-
ignore_ties=False,
82+
ignore_ties=ignore_ties,
10483
)
105-
106-
expected_columns = [
107-
"unit_spend_rank",
108-
"customer_id_rank",
109-
"composite_rank",
110-
]
111-
112-
for col in expected_columns:
113-
assert col in cr.df.columns

0 commit comments

Comments
 (0)