41
41
42
42
from pyretailscience .options import get_option
43
43
44
- SUPPORTED_COMBINATIONS = 2
45
-
46
44
47
45
class ProductAssociation :
48
46
"""A class for generating and analyzing product association rules.
@@ -143,7 +141,7 @@ def __init__(
143
141
)
144
142
145
143
@staticmethod
146
- def _calc_association ( # (ignore complexity) - Excluded due to min_* arguments checks
144
+ def _calc_association (
147
145
df : pd .DataFrame | ibis .Table ,
148
146
value_col : str ,
149
147
group_col : str = get_option ("column.customer_id" ),
@@ -209,40 +207,34 @@ def _calc_association( # (ignore complexity) - Excluded due to min_* arguments
209
207
if isinstance (df , pd .DataFrame ):
210
208
df = ibis .memtable (df )
211
209
212
- unique_transactions = (
213
- df .group_by (group_col ).aggregate (products = lambda t , col = value_col : t [col ].collect ()).order_by (group_col )
214
- )
215
- unique_transactions = unique_transactions .mutate (
216
- item = ibis .expr .operations .Unnest (unique_transactions ["products" ]),
217
- ).drop ("products" )
210
+ unique_transactions = df .select (df [group_col ], df [value_col ].name ("item" )).distinct ()
218
211
219
212
total_transactions = unique_transactions [group_col ].nunique ().execute ()
220
-
221
213
product_occurrences = (
222
214
unique_transactions .group_by ("item" )
223
- .aggregate (occurrences = lambda t , col = group_col : t [ col ]. nunique ())
224
- . order_by ( "item" )
225
- )
226
- product_occurrences = product_occurrences . mutate (
227
- occurrence_probability = product_occurrences [ " occurrences" ] / total_transactions ,
215
+ .aggregate (
216
+ occurrences = lambda t , col = group_col : t [ col ]. nunique (),
217
+ occurrence_probability = lambda t , col = group_col : t [ col ]. nunique () / total_transactions ,
218
+ )
219
+ . filter ( lambda t : t . occurrences >= min_occurrences )
228
220
)
229
- product_occurrences = product_occurrences .filter (product_occurrences ["occurrences" ] >= min_occurrences )
230
221
231
222
left_table = unique_transactions .rename ({"item_1" : "item" })
232
223
right_table = unique_transactions .rename ({"item_2" : "item" })
233
224
234
- merged_df = ibis .join (
235
- left_table ,
225
+ merged_df = left_table .join (
236
226
right_table ,
237
- predicates = [left_table [group_col ] == right_table [group_col ]],
227
+ predicates = [
228
+ left_table [group_col ] == right_table [group_col ],
229
+ left_table ["item_1" ] < right_table ["item_2" ],
230
+ ],
238
231
)
239
- merged_df = merged_df .filter (merged_df ["item_1" ] < merged_df ["item_2" ])
240
232
241
233
product_occurrences_1 = product_occurrences .rename (
242
- {"item_1" : "item" , "occurrences_x " : "occurrences" , "occurrence_probability_x " : "occurrence_probability" },
234
+ {"item_1" : "item" , "occurrences_1 " : "occurrences" , "occurrence_probability_1 " : "occurrence_probability" },
243
235
)
244
236
product_occurrences_2 = product_occurrences .rename (
245
- {"item_2" : "item" , "occurrences_y " : "occurrences" , "occurrence_probability_y " : "occurrence_probability" },
237
+ {"item_2" : "item" , "occurrences_2 " : "occurrences" , "occurrence_probability_2 " : "occurrence_probability" },
246
238
)
247
239
248
240
merged_df = ibis .join (
@@ -255,13 +247,9 @@ def _calc_association( # (ignore complexity) - Excluded due to min_* arguments
255
247
merged_df ,
256
248
product_occurrences_2 ,
257
249
predicates = [merged_df ["item_2" ] == product_occurrences_2 ["item_2" ]],
258
- ).order_by ([group_col , "item_1" , "item_2" ])
259
-
260
- cooccurrences = (
261
- merged_df .group_by (["item_1" , "item_2" ])
262
- .aggregate (cooccurrences = merged_df [group_col ].nunique ())
263
- .order_by (["item_1" , "cooccurrences" ])
264
250
)
251
+
252
+ cooccurrences = merged_df .group_by (["item_1" , "item_2" ]).aggregate (cooccurrences = merged_df [group_col ].nunique ())
265
253
cooccurrences = cooccurrences .mutate (
266
254
total_count = total_transactions ,
267
255
support = cooccurrences .cooccurrences / total_transactions ,
@@ -286,7 +274,7 @@ def _calc_association( # (ignore complexity) - Excluded due to min_* arguments
286
274
product_pairs ,
287
275
product_occurrences_2_rename ,
288
276
predicates = [product_pairs ["item_2" ] == product_occurrences_2_rename ["item_2" ]],
289
- ). order_by ([ "item_1" , "item_2" ])
277
+ )
290
278
291
279
product_pairs = product_pairs .mutate (
292
280
confidence = product_pairs ["cooccurrences" ] / product_pairs ["occurrences_1" ],
0 commit comments