Skip to content

Commit d350ccb

Browse files
authored
Merge pull request #149 from Data-Simply/feature/product-association
product association
2 parents 919ad00 + 8133202 commit d350ccb

File tree

2 files changed

+38
-21
lines changed

2 files changed

+38
-21
lines changed

pyretailscience/analysis/product_association.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ class ProductAssociation:
8080
- uplift: The ratio of the observed support to the expected support if the products were independent.
8181
"""
8282

83+
_df: pd.DataFrame | None = None
84+
8385
def __init__(
8486
self,
8587
df: pd.DataFrame | ibis.Table,
@@ -124,7 +126,7 @@ def __init__(
124126
msg = f"The following columns are required but missing: {missing_cols}"
125127
raise ValueError(msg)
126128

127-
self.df = self._calc_association(
129+
self.table = self._calc_association(
128130
df=df,
129131
value_col=value_col,
130132
group_col=group_col,
@@ -305,10 +307,13 @@ def _calc_association(
305307
result = result[col_order].union(inverse_pairs[col_order])
306308

307309
result = result.filter(result.confidence >= min_confidence)
308-
309-
final_result = result.execute().sort_values(by=["item_1", "item_2"]).reset_index(drop=True)
310-
final_result = final_result.rename(columns={"item_1": f"{value_col}_1", "item_2": f"{value_col}_2"})
311-
310+
final_result = result.order_by(["item_1", "item_2"])
311+
final_result = final_result.rename(
312+
{
313+
f"{value_col}_1": "item_1",
314+
f"{value_col}_2": "item_2",
315+
},
316+
)
312317
return final_result[
313318
[
314319
f"{value_col}_1",
@@ -321,3 +326,10 @@ def _calc_association(
321326
"uplift",
322327
]
323328
]
329+
330+
@property
331+
def df(self) -> pd.DataFrame:
332+
"""Returns the executed DataFrame."""
333+
if self._df is None:
334+
self._df = self.table.execute().reset_index(drop=True)
335+
return self._df

tests/analysis/test_product_association.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -106,27 +106,27 @@ def expected_results_pair_items_df(self) -> pd.DataFrame:
106106

107107
def test_calc_association_all_single_items(self, transactions_df, expected_results_single_items_df):
108108
"""Test calculating association rules for a single item versus another of item for all items."""
109-
associations_df = ProductAssociation._calc_association(
109+
associations_df = ProductAssociation(
110110
df=transactions_df,
111111
value_col="product",
112112
group_col=cols.transaction_id,
113113
)
114-
115-
pd.testing.assert_frame_equal(associations_df, expected_results_single_items_df)
114+
result = associations_df.df
115+
pd.testing.assert_frame_equal(result, expected_results_single_items_df)
116116

117117
def test_calc_association_target_single_items(self, transactions_df, expected_results_single_items_df):
118118
"""Test calculating association rules for target single item versus another of item."""
119119
target_item = "bread"
120120

121-
calc_df = ProductAssociation._calc_association(
121+
calc_df = ProductAssociation(
122122
df=transactions_df,
123123
value_col="product",
124124
group_col=cols.transaction_id,
125125
target_item=target_item,
126126
)
127-
127+
result = calc_df.df
128128
pd.testing.assert_frame_equal(
129-
calc_df,
129+
result,
130130
expected_results_single_items_df[expected_results_single_items_df["product_1"] == target_item].reset_index(
131131
drop=True,
132132
),
@@ -136,15 +136,16 @@ def test_calc_association_min_occurrences(self, transactions_df, expected_result
136136
"""Test calculating association rules with a min occurrences level."""
137137
min_occurrences = 2
138138

139-
calc_df = ProductAssociation._calc_association(
139+
calc_df = ProductAssociation(
140140
df=transactions_df,
141141
value_col="product",
142142
group_col=cols.transaction_id,
143143
min_occurrences=min_occurrences,
144144
)
145145

146+
result = calc_df.df
146147
pd.testing.assert_frame_equal(
147-
calc_df,
148+
result,
148149
expected_results_single_items_df[
149150
(expected_results_single_items_df["occurrences_1"] >= min_occurrences)
150151
& (expected_results_single_items_df["occurrences_2"] >= min_occurrences)
@@ -155,15 +156,16 @@ def test_calc_association_min_cooccurrences(self, transactions_df, expected_resu
155156
"""Test calculating association rules with a min occurrences level."""
156157
min_cooccurrences = 2
157158

158-
calc_df = ProductAssociation._calc_association(
159+
calc_df = ProductAssociation(
159160
df=transactions_df,
160161
value_col="product",
161162
group_col=cols.transaction_id,
162163
min_cooccurrences=min_cooccurrences,
163164
)
164165

166+
result = calc_df.df
165167
pd.testing.assert_frame_equal(
166-
calc_df,
168+
result,
167169
expected_results_single_items_df[
168170
(expected_results_single_items_df["cooccurrences"] >= min_cooccurrences)
169171
].reset_index(drop=True),
@@ -173,15 +175,16 @@ def test_calc_association_min_support(self, transactions_df, expected_results_si
173175
"""Test calculating association rules with a min occurrences level."""
174176
min_support = 0.25
175177

176-
calc_df = ProductAssociation._calc_association(
178+
calc_df = ProductAssociation(
177179
df=transactions_df,
178180
value_col="product",
179181
group_col=cols.transaction_id,
180182
min_support=min_support,
181183
)
182184

185+
result = calc_df.df
183186
pd.testing.assert_frame_equal(
184-
calc_df,
187+
result,
185188
expected_results_single_items_df[(expected_results_single_items_df["support"] >= min_support)].reset_index(
186189
drop=True,
187190
),
@@ -191,15 +194,16 @@ def test_calc_association_min_confidence(self, transactions_df, expected_results
191194
"""Test calculating association rules with a min occurrences level."""
192195
min_confidence = 0.25
193196

194-
calc_df = ProductAssociation._calc_association(
197+
calc_df = ProductAssociation(
195198
df=transactions_df,
196199
value_col="product",
197200
group_col=cols.transaction_id,
198201
min_confidence=min_confidence,
199202
)
200203

204+
result = calc_df.df
201205
pd.testing.assert_frame_equal(
202-
calc_df,
206+
result,
203207
expected_results_single_items_df[
204208
(expected_results_single_items_df["confidence"] >= min_confidence)
205209
].reset_index(drop=True),
@@ -209,15 +213,16 @@ def test_calc_association_min_uplift(self, transactions_df, expected_results_sin
209213
"""Test calculating association rules with a min occurrences level."""
210214
min_uplift = 1
211215

212-
calc_df = ProductAssociation._calc_association(
216+
calc_df = ProductAssociation(
213217
df=transactions_df,
214218
value_col="product",
215219
group_col=cols.transaction_id,
216220
min_uplift=min_uplift,
217221
)
218222

223+
result = calc_df.df
219224
pd.testing.assert_frame_equal(
220-
calc_df,
225+
result,
221226
expected_results_single_items_df[(expected_results_single_items_df["uplift"] >= min_uplift)].reset_index(
222227
drop=True,
223228
),

0 commit comments

Comments
 (0)