Skip to content

Commit 706f0c7

Browse files
committed
FEAT: deprecate target aggregated label via the group that created it (closes larray-project#994)
1 parent 48b5cf0 commit 706f0c7

File tree

3 files changed

+105
-31
lines changed

3 files changed

+105
-31
lines changed

doc/source/changes/version_0_34.rst.inc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,34 @@ Syntax changes
1515

1616
* renamed ``Array.sort_axes()`` to :py:obj:`Array.sort_labels()` (closes :issue:`861`).
1717

18+
* deprecated the ability to target a label in an aggregated array using the group that created it.
19+
The aggregated array label should be used instead. This is a seldom used feature which is complex
20+
to keep working and has a significant performance cost in some cases, even when the feature is not used
21+
(closes :issue:`994`).
22+
23+
In other words, the following code will now raise a warning:
24+
25+
>>> arr = ndtest(4)
26+
>>> arr
27+
a a0 a1 a2 a3
28+
0 1 2 3
29+
>>> group1 = arr.a['a0', 'a2'] >> 'a0_a2'
30+
>>> group2 = arr.a['a1', 'a3'] >> 'a1_a3'
31+
>>> agg_arr = arr.sum((group1, group2))
32+
>>> agg_arr
33+
a a0_a2 a1_a3
34+
2 4
35+
>>> agg_arr[group1]
36+
FutureWarning: Using a Group object which was used to create an aggregate to target its aggregated label is deprecated.
37+
Please use the aggregated label directly instead. In this case, you should use 'a0_a2' instead of using
38+
a['a0', 'a2'] >> 'a0_a2'.
39+
2
40+
41+
One should use the label on the aggregated array instead:
42+
43+
>>> agg_arr['a0_a2']
44+
2
45+
1846

1947
Backward incompatible changes
2048
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

larray/core/axis.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,13 @@ def isscalar(k):
845845
and key.axis.name == self.name
846846
and key.name in self
847847
):
848+
msg = "Using a Group object which was used to create an aggregate to " \
849+
"target its aggregated label is deprecated. " \
850+
"Please use the aggregated label directly instead. " \
851+
f"In this case, you should use {key.name!r} instead of " \
852+
f"using {key!r}."
853+
# let us hope the stacklevel does not vary by codepath
854+
warnings.warn(msg, FutureWarning, stacklevel=7)
848855
return LGroup(key.name, None, self)
849856
# elif isinstance(key, str) and key in self:
850857
# TODO: this is an awful workaround to avoid the "processing" of string keys which exist as is in the axis
@@ -905,9 +912,23 @@ def index(self, key) -> Union[int, np.ndarray, slice]:
905912
# XXX: this is potentially very expensive if key.key is an array or list and should be tried as a last
906913
# resort
907914
potential_tick = _to_tick(key)
915+
908916
# avoid matching 0 against False or 0.0, note that None has object dtype and so always pass this test
909917
if self._is_key_type_compatible(potential_tick):
910-
return mapping[potential_tick]
918+
try:
919+
res_idx = mapping[potential_tick]
920+
if potential_tick != key.key:
921+
# only warn if no KeyError was raised (potential_tick is in mapping)
922+
msg = "Using a Group object which was used to create an aggregate to " \
923+
"target its aggregated label is deprecated. " \
924+
"Please use the aggregated label directly instead. " \
925+
f"In this case, you should use {potential_tick!r} instead of " \
926+
f"using {key!r}."
927+
# let us hope the stacklevel does not vary by codepath
928+
warnings.warn(msg, FutureWarning, stacklevel=8)
929+
return res_idx
930+
except KeyError:
931+
pass
911932
# we must catch TypeError because key might not be hashable (eg slice)
912933
# IndexError is for when mapping is an ndarray
913934
except (KeyError, TypeError, IndexError):

larray/tests/test_array.py

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
read_hdf, read_csv, read_eurostat, read_excel, open_excel,
1717
from_lists, from_string, from_frame, from_series,
1818
zip_array_values, zip_array_items)
19-
from larray.core.axis import _to_ticks, _to_key
19+
from larray.core.axis import _to_ticks, _to_tick, _to_key
2020
from larray.util.misc import LHDFStore
2121

2222
# avoid flake8 errors
@@ -27,6 +27,17 @@
2727
# E201: whitespace after '['
2828
# E241: multiple spaces after ','
2929

30+
31+
GROUP_AS_AGGREGATED_LABEL_MSG_TEMPLATE = "Using a Group object which was used to create an aggregate to " \
32+
"target its aggregated label is deprecated. " \
33+
"Please use the aggregated label directly instead. " \
34+
"In this case, you should use {potential_tick!r} instead of " \
35+
"using {key!r}."
36+
37+
def group_as_aggregated_label_msg(key):
38+
return GROUP_AS_AGGREGATED_LABEL_MSG_TEMPLATE.format(potential_tick=_to_tick(key), key=key)
39+
40+
3041
# ================== #
3142
# Test Value Strings #
3243
# ================== #
@@ -2315,8 +2326,9 @@ def test_group_agg_on_group_agg_nokw(array):
23152326
def test_getitem_on_group_agg(array):
23162327
a, b, c, d = array.axes
23172328

2318-
# using a string
2329+
# using a string (b_group1 is a string key)
23192330
agg_arr = array.sum(a, c).sum(b=b_groups_all)
2331+
23202332
# the following are all equivalent
23212333
assert agg_arr[b_group1].shape == (6,)
23222334
assert agg_arr[(b_group1,)].shape == (6,)
@@ -2330,22 +2342,26 @@ def test_getitem_on_group_agg(array):
23302342
# using an anonymous LGroup
23312343
lg1 = b[b_group1]
23322344
agg_arr = array.sum(a, c).sum(b=(lg1, b_group2, b_group2, all_b))
2333-
# the following are all equivalent
2334-
assert agg_arr[lg1].shape == (6,)
2335-
assert agg_arr[(lg1,)].shape == (6,)
2336-
assert agg_arr[(lg1, slice(None))].shape == (6,)
2337-
assert agg_arr[lg1, slice(None)].shape == (6,)
2338-
assert agg_arr[lg1, :].shape == (6,)
2345+
with must_warn(FutureWarning, msg=group_as_aggregated_label_msg(lg1), num_expected=5):
2346+
# the following should all be equivalent
2347+
assert agg_arr[lg1].shape == (6,)
2348+
assert agg_arr[(lg1,)].shape == (6,)
2349+
# these last three are only syntactic sugar differences
2350+
# (__getitem__ receives the *exact* same key)
2351+
assert agg_arr[(lg1, slice(None))].shape == (6,)
2352+
assert agg_arr[lg1, slice(None)].shape == (6,)
2353+
assert agg_arr[lg1, :].shape == (6,)
23392354

23402355
# using a named LGroup
23412356
lg1 = b[b_group1] >> 'g1'
23422357
agg_arr = array.sum(a, c).sum(b=(lg1, b_group2, b_group2, all_b))
2343-
# the following are all equivalent
2344-
assert agg_arr[lg1].shape == (6,)
2345-
assert agg_arr[(lg1,)].shape == (6,)
2346-
assert agg_arr[(lg1, slice(None))].shape == (6,)
2347-
assert agg_arr[lg1, slice(None)].shape == (6,)
2348-
assert agg_arr[lg1, :].shape == (6,)
2358+
with must_warn(FutureWarning, msg=group_as_aggregated_label_msg(lg1), num_expected=5):
2359+
# the following are all equivalent
2360+
assert agg_arr[lg1].shape == (6,)
2361+
assert agg_arr[(lg1,)].shape == (6,)
2362+
assert agg_arr[(lg1, slice(None))].shape == (6,)
2363+
assert agg_arr[lg1, slice(None)].shape == (6,)
2364+
assert agg_arr[lg1, :].shape == (6,)
23492365

23502366

23512367
def test_getitem_on_group_agg_nokw(array):
@@ -2354,6 +2370,7 @@ def test_getitem_on_group_agg_nokw(array):
23542370
# using a string
23552371
agg_arr = array.sum(a, c).sum((b_group1, b_group2, b_group3, all_b))
23562372
# the following are all equivalent
2373+
# b_group1 is a string key
23572374
assert agg_arr[b_group1].shape == (6,)
23582375
assert agg_arr[(b_group1,)].shape == (6,)
23592376
assert agg_arr[(b_group1, slice(None))].shape == (6,)
@@ -2366,22 +2383,24 @@ def test_getitem_on_group_agg_nokw(array):
23662383
# using an anonymous LGroup
23672384
lg1 = b[b_group1]
23682385
agg_arr = array.sum(a, c).sum((lg1, b_group2, b_group3, all_b))
2369-
# the following are all equivalent
2370-
assert agg_arr[lg1].shape == (6,)
2371-
assert agg_arr[(lg1,)].shape == (6,)
2372-
assert agg_arr[(lg1, slice(None))].shape == (6,)
2373-
assert agg_arr[lg1, slice(None)].shape == (6,)
2374-
assert agg_arr[lg1, :].shape == (6,)
2386+
with must_warn(FutureWarning, msg=group_as_aggregated_label_msg(lg1), num_expected=5):
2387+
# the following are all equivalent
2388+
assert agg_arr[lg1].shape == (6,)
2389+
assert agg_arr[(lg1,)].shape == (6,)
2390+
assert agg_arr[(lg1, slice(None))].shape == (6,)
2391+
assert agg_arr[lg1, slice(None)].shape == (6,)
2392+
assert agg_arr[lg1, :].shape == (6,)
23752393

23762394
# using a named LGroup
23772395
lg1 = b[b_group1] >> 'g1'
23782396
agg_arr = array.sum(a, c).sum((lg1, b_group2, b_group3, all_b))
2379-
# the following are all equivalent
2380-
assert agg_arr[lg1].shape == (6,)
2381-
assert agg_arr[(lg1,)].shape == (6,)
2382-
assert agg_arr[(lg1, slice(None))].shape == (6,)
2383-
assert agg_arr[lg1, slice(None)].shape == (6,)
2384-
assert agg_arr[lg1, :].shape == (6,)
2397+
with must_warn(FutureWarning, msg=group_as_aggregated_label_msg(lg1), num_expected=5):
2398+
# the following are all equivalent
2399+
assert agg_arr[lg1].shape == (6,)
2400+
assert agg_arr[(lg1,)].shape == (6,)
2401+
assert agg_arr[(lg1, slice(None))].shape == (6,)
2402+
assert agg_arr[lg1, slice(None)].shape == (6,)
2403+
assert agg_arr[lg1, :].shape == (6,)
23852404

23862405

23872406
def test_filter_on_group_agg(array):
@@ -2395,7 +2414,8 @@ def test_filter_on_group_agg(array):
23952414
# using a named LGroup
23962415
g1 = b[b_group1] >> 'g1'
23972416
agg_arr = array.sum(a, c).sum(b=(g1, b_group2, b_group3, all_b))
2398-
assert agg_arr.filter(b=g1).shape == (6,)
2417+
with must_warn(FutureWarning, msg=group_as_aggregated_label_msg(g1)):
2418+
assert agg_arr.filter(b=g1).shape == (6,)
23992419

24002420
# Note that agg_arr.filter(b=(g1,)) does NOT work. It might be a
24012421
# little confusing for users, because agg_arr[(g1,)] works but it is
@@ -2418,7 +2438,8 @@ def test_filter_on_group_agg(array):
24182438
# assert bya.filter(a=':17').shape == (12, 2, 6)
24192439

24202440
bya = array.sum(a=(a0to5_named, 5, a6to13, a14plus))
2421-
assert bya.filter(a=a0to5_named).shape == (12, 2, 6)
2441+
with must_warn(FutureWarning, msg=group_as_aggregated_label_msg(a0to5_named)):
2442+
assert bya.filter(a=a0to5_named).shape == (12, 2, 6)
24222443

24232444

24242445
def test_sum_several_lg_groups(array):
@@ -2433,8 +2454,12 @@ def test_sum_several_lg_groups(array):
24332454

24342455
# the result is indexable
24352456
# 1.a) by LGroup
2436-
assert agg_arr.filter(b=lg1).shape == (19, 2, 6)
2437-
assert agg_arr.filter(b=(lg1, lg2)).shape == (19, 2, 2, 6)
2457+
msg1 = group_as_aggregated_label_msg(lg1)
2458+
with must_warn(FutureWarning, msg=msg1):
2459+
assert agg_arr.filter(b=lg1).shape == (19, 2, 6)
2460+
msg2 = group_as_aggregated_label_msg(lg2)
2461+
with must_warn(FutureWarning, msg=(msg1, msg2), check_file=False):
2462+
assert agg_arr.filter(b=(lg1, lg2)).shape == (19, 2, 2, 6)
24382463

24392464
# 1.b) by string (name of groups)
24402465
assert agg_arr.filter(b='lg1').shape == (19, 2, 6)

0 commit comments

Comments
 (0)