Skip to content

Commit f1d5d29

Browse files
authored
Merge 9ac5477 into 53a04bc
2 parents 53a04bc + 9ac5477 commit f1d5d29

File tree

2 files changed

+257
-26
lines changed

2 files changed

+257
-26
lines changed

src/hats/pixel_math/partition_stats.py

Lines changed: 101 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,12 @@ def generate_histogram(
7272

7373

7474
def generate_alignment(
75-
histogram, highest_order=10, lowest_order=0, threshold=1_000_000, drop_empty_siblings=False
75+
row_count_histogram,
76+
highest_order=10,
77+
lowest_order=0,
78+
threshold=1_000_000,
79+
drop_empty_siblings=False,
80+
mem_size_histogram=None,
7681
):
7782
"""Generate alignment from high order pixels to those of equal or lower order
7883
@@ -83,9 +88,9 @@ def generate_alignment(
8388
8489
Parameters
8590
----------
86-
histogram : np.array
87-
one-dimensional numpy array of long integers where the
88-
value at each index corresponds to the number of objects found at the healpix pixel.
91+
row_count_histogram : np.array
92+
one-dimensional numpy array of long integers where the value at each index corresponds to
93+
the number of objects found at the healpix pixel.
8994
highest_order : int
9095
the highest healpix order (e.g. 5-10) (Default value = 10)
9196
lowest_order : int
@@ -95,6 +100,10 @@ def generate_alignment(
95100
the maximum number of objects allowed in a single pixel (Default value = 1_000_000)
96101
drop_empty_siblings : bool
97102
if 3 of 4 pixels are empty, keep only the non-empty pixel (Default value = False)
103+
mem_size_histogram : np.array or None
104+
one-dimensional numpy array of long integers where the value at each index corresponds to
105+
the memory size (in bytes) of objects found at the healpix pixel. If provided, this will be
106+
used to determine the thresholding instead of the param `histogram`. (Default value = None)
98107
99108
Returns
100109
-------
@@ -113,23 +122,41 @@ def generate_alignment(
113122
if the histogram is the wrong size, or some initial histogram bins
114123
exceed threshold.
115124
"""
116-
_validate_alignment_arguments(histogram, highest_order, lowest_order, threshold)
125+
# Validate inputs.
126+
_validate_alignment_arguments(
127+
row_count_histogram, mem_size_histogram, highest_order, lowest_order, threshold
128+
)
117129

118-
nested_sums = _get_nested_sums(histogram, highest_order, lowest_order)
130+
# Generate nested sums.
131+
nested_sums_row_count = _get_nested_sums(row_count_histogram, highest_order, lowest_order)
132+
if mem_size_histogram is not None:
133+
nested_sums_mem_size = _get_nested_sums(mem_size_histogram, highest_order, lowest_order)
134+
else:
135+
nested_sums_mem_size = None
119136

137+
# Generate alignment.
120138
if drop_empty_siblings:
121-
return _get_alignment_dropping_siblings(nested_sums, highest_order, lowest_order, threshold)
122-
return _get_alignment(nested_sums, highest_order, lowest_order, threshold)
139+
return _get_alignment_dropping_siblings(
140+
nested_sums_row_count, highest_order, lowest_order, threshold, nested_sums_mem_size
141+
)
142+
return _get_alignment(nested_sums_row_count, highest_order, lowest_order, threshold, nested_sums_mem_size)
123143

124144

125-
def _validate_alignment_arguments(histogram, highest_order, lowest_order, threshold):
126-
if len(histogram) != hp.order2npix(highest_order):
145+
def _validate_alignment_arguments(
146+
row_count_histogram, mem_size_histogram, highest_order, lowest_order, threshold
147+
):
148+
if len(row_count_histogram) != hp.order2npix(highest_order):
127149
raise ValueError("histogram is not the right size")
128150
if lowest_order > highest_order:
129151
raise ValueError("lowest_order should be less than highest_order")
130-
max_bin = np.amax(histogram)
131-
if max_bin > threshold:
132-
raise ValueError(f"single pixel count {max_bin} exceeds threshold {threshold}")
152+
if mem_size_histogram is not None:
153+
max_bin = np.amax(mem_size_histogram)
154+
if max_bin > threshold:
155+
raise ValueError(f"single pixel mem_size {max_bin} exceeds threshold {threshold}")
156+
else:
157+
max_bin = np.amax(row_count_histogram)
158+
if max_bin > threshold:
159+
raise ValueError(f"single pixel row count {max_bin} exceeds threshold {threshold}")
133160

134161

135162
def _get_nested_sums(histogram, highest_order, lowest_order):
@@ -150,16 +177,22 @@ def _get_nested_sums(histogram, highest_order, lowest_order):
150177
return nested_sums
151178

152179

153-
def _get_alignment(nested_sums, highest_order, lowest_order, threshold):
180+
def _get_alignment(nested_sums_row_count, highest_order, lowest_order, threshold, nested_sums_mem_size):
154181
"""Method to aggregate pixels up to the threshold.
155182
156183
Checks from low order (large areas), drilling down into higher orders (smaller areas) to
157184
find the appropriate order for an area of sky."""
185+
if nested_sums_mem_size is not None:
186+
nested_sums = nested_sums_mem_size
187+
else:
188+
nested_sums = nested_sums_row_count
189+
190+
# Initialize our alignment structure.
158191
nested_alignment = []
159192
for i in range(0, highest_order + 1):
160193
nested_alignment.append(np.full(hp.order2npix(i), None))
161194

162-
# work forward - determine if we should map to a lower order pixel, this pixel, or keep looking.
195+
# Work forward - determine if we should map to a lower order pixel, this pixel, or keep looking.
163196
for read_order in range(lowest_order, highest_order + 1):
164197
parent_order = read_order - 1
165198
for index in range(0, len(nested_sums[read_order])):
@@ -173,16 +206,40 @@ def _get_alignment(nested_sums, highest_order, lowest_order, threshold):
173206
elif nested_sums[read_order][index] == 0:
174207
continue
175208
elif nested_sums[read_order][index] <= threshold:
176-
nested_alignment[read_order][index] = (
177-
read_order,
178-
index,
179-
nested_sums[read_order][index],
180-
)
209+
# For row_count mode, use tuple of (order, pixel, row_count)
210+
if not nested_sums_mem_size:
211+
nested_alignment[read_order][index] = (
212+
read_order,
213+
index,
214+
nested_sums[read_order][index],
215+
)
216+
# For mem_size mode, use tuple of (order, pixel, row_count, mem_size)
217+
else:
218+
mem_size = nested_sums_mem_size[read_order][index]
219+
row_count = nested_sums_row_count[read_order][index]
220+
nested_alignment[read_order][index] = (
221+
read_order,
222+
index,
223+
row_count,
224+
mem_size,
225+
)
226+
227+
# We no longer need to store mem_size sums once the alignment has been constructed.
228+
if nested_sums_mem_size is not None:
229+
nested_alignment[highest_order] = np.array(
230+
[
231+
pixel_alignment[:3] if pixel_alignment else None
232+
for pixel_alignment in nested_alignment[highest_order]
233+
],
234+
dtype="object",
235+
)
181236

182237
return nested_alignment[highest_order]
183238

184239

185-
def _get_alignment_dropping_siblings(nested_sums, highest_order, lowest_order, threshold):
240+
def _get_alignment_dropping_siblings(
241+
nested_sum_row_count, highest_order, lowest_order, threshold, nested_sums_mem_size
242+
):
186243
"""Method to aggregate pixels up to the threshold that collapses completely empty pixels away.
187244
188245
Checks from higher order (smaller areas) out to lower order (large areas). In this way, we are able to
@@ -209,6 +266,15 @@ def _get_alignment_dropping_siblings(nested_sums, highest_order, lowest_order, t
209266
- total number in cell is greater than the threshold
210267
- only one subcell contains values
211268
"""
269+
# If nested_sums_mem_size is provided, we're in mem_size mode (and thresholding by memory size).
270+
# This means we'll want to use the mem_size sums to generate our alignment, but still keep track
271+
# of the row counts for the output.
272+
if nested_sums_mem_size is not None:
273+
nested_sums = nested_sums_mem_size
274+
else:
275+
nested_sums = nested_sum_row_count
276+
277+
# Initialize our order map to the highest order.
212278
order_map = np.array(
213279
[highest_order if count > 0 else -1 for count in nested_sums[highest_order]], dtype=np.int32
214280
)
@@ -237,19 +303,22 @@ def _get_alignment_dropping_siblings(nested_sums, highest_order, lowest_order, t
237303
)
238304
for pixel_high_index, intended_order in enumerate(order_map)
239305
]
306+
# In both row_count and mem_size mode, use tuple of (order, pixel, row_count)
307+
# as mem_size is no longer needed now that alignment has been constructed.
240308
nested_alignment = [
241-
(tup[0], tup[1], nested_sums[tup[0]][tup[1]]) if tup else None for tup in nested_alignment
309+
(tup[0], tup[1], nested_sum_row_count[tup[0]][tup[1]]) if tup else None for tup in nested_alignment
242310
]
243311

244312
return np.array(nested_alignment, dtype="object")
245313

246314

247315
def generate_incremental_alignment(
248-
histogram: np.ndarray,
316+
row_count_histogram: np.ndarray,
249317
existing_pixels: Sequence[tuple[int, int]],
250318
highest_order: int = 10,
251319
lowest_order: int = 0,
252320
threshold: int = 1_000_000,
321+
mem_size_histogram: np.ndarray | None = None,
253322
):
254323
"""Generate alignment for an incremental catalog.
255324
@@ -262,7 +331,7 @@ def generate_incremental_alignment(
262331
263332
Parameters
264333
----------
265-
histogram : np.ndarray
334+
row_count_histogram : np.ndarray
266335
one-dimensional numpy array of long integers where the
267336
value at each index corresponds to the number of objects
268337
found at the healpix pixel.
@@ -275,6 +344,10 @@ def generate_incremental_alignment(
275344
constrains the partitioning to prevent spatially large pixels. (Default value = 0)
276345
threshold : int
277346
the maximum number of objects allowed in a single pixel (Default value = 1_000_000)
347+
mem_size_histogram : np.ndarray or None
348+
one-dimensional numpy array of long integers where the value at each index corresponds to
349+
the memory size (in bytes) of objects found at the healpix pixel. If provided, this will be
350+
used to determine the thresholding instead of the param `histogram`. (Default value = None)
278351
279352
Returns
280353
-------
@@ -287,9 +360,11 @@ def generate_incremental_alignment(
287360
- pixel number *at the above order*
288361
- the number of objects in the pixel
289362
"""
290-
_validate_alignment_arguments(histogram, highest_order, lowest_order, threshold)
363+
_validate_alignment_arguments(
364+
row_count_histogram, mem_size_histogram, highest_order, lowest_order, threshold
365+
)
291366

292-
nested_sums = _get_nested_sums(histogram, highest_order, lowest_order)
367+
nested_sums = _get_nested_sums(row_count_histogram, highest_order, lowest_order)
293368

294369
tree = PixelTree.from_healpix(existing_pixels)
295370
if tree.tree_order > highest_order:

tests/hats/pixel_math/test_partition_stats.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,159 @@ def test_incremental_alignment_highest_order_invalid():
255255
hist.generate_incremental_alignment(
256256
hist.empty_histogram(0), existing_pixels=[(1, 45)], highest_order=0
257257
)
258+
259+
260+
def test_generate_alignment_mem_size():
261+
"""Create alignment based on memory size histogram"""
262+
initial_row_count_histogram = hist.empty_histogram(2)
263+
filled_pixels = [11_000, 11, 14, 13, 5, 7, 8, 9, 11, 23, 4, 4, 17, 0, 1, 0]
264+
initial_row_count_histogram[176:] = filled_pixels[:]
265+
266+
initial_mem_size_histogram = hist.empty_histogram(2)
267+
filled_mem_sizes = [
268+
4_000,
269+
1_000,
270+
1_000,
271+
1_000,
272+
5_000,
273+
7_000,
274+
8_000,
275+
9_000,
276+
1_000,
277+
2_000,
278+
4_000,
279+
4_000,
280+
7_000,
281+
0,
282+
1_000,
283+
0,
284+
]
285+
initial_mem_size_histogram[176:] = filled_mem_sizes[:]
286+
287+
# Generate alignment based on memory size thresholding.
288+
result = hist.generate_alignment(
289+
initial_row_count_histogram,
290+
highest_order=2,
291+
threshold=10_000,
292+
mem_size_histogram=initial_mem_size_histogram,
293+
)
294+
295+
expected = np.full(hp.order2npix(2), None)
296+
tuples = [
297+
(1, 44, 11038),
298+
(1, 44, 11038),
299+
(1, 44, 11038),
300+
(1, 44, 11038),
301+
(2, 180, 5),
302+
(2, 181, 7),
303+
(2, 182, 8),
304+
(2, 183, 9),
305+
(2, 184, 11),
306+
(2, 185, 23),
307+
(2, 186, 4),
308+
(2, 187, 4),
309+
(1, 47, 18),
310+
(1, 47, 18),
311+
(1, 47, 18),
312+
(1, 47, 18),
313+
]
314+
expected[176:192] = tuples
315+
316+
npt.assert_array_equal(result, expected)
317+
318+
319+
def test_generate_alignment_mem_size_dropping_siblings():
320+
"""Create alignment based on memory size histogram"""
321+
initial_row_count_histogram = hist.empty_histogram(2)
322+
filled_pixels = [0, 0, 14, 0, 5, 7, 8, 9, 11, 23, 4, 4, 17, 0, 1, 0]
323+
initial_row_count_histogram[176:] = filled_pixels[:]
324+
325+
initial_mem_size_histogram = hist.empty_histogram(2)
326+
filled_mem_sizes = [
327+
0,
328+
0,
329+
4_000,
330+
0,
331+
5_000,
332+
7_000,
333+
8_000,
334+
9_000,
335+
1_000,
336+
2_000,
337+
4_000,
338+
4_000,
339+
7_000,
340+
0,
341+
1_000,
342+
0,
343+
]
344+
initial_mem_size_histogram[176:] = filled_mem_sizes[:]
345+
346+
# Generate alignment based on memory size thresholding.
347+
result = hist.generate_alignment(
348+
initial_row_count_histogram,
349+
highest_order=2,
350+
threshold=10_000,
351+
drop_empty_siblings=True,
352+
mem_size_histogram=initial_mem_size_histogram,
353+
)
354+
355+
expected = np.full(hp.order2npix(2), None)
356+
tuples = [
357+
None,
358+
None,
359+
(2, 178, 14),
360+
None,
361+
(2, 180, 5),
362+
(2, 181, 7),
363+
(2, 182, 8),
364+
(2, 183, 9),
365+
(2, 184, 11),
366+
(2, 185, 23),
367+
(2, 186, 4),
368+
(2, 187, 4),
369+
(1, 47, 18),
370+
(1, 47, 18),
371+
(1, 47, 18),
372+
(1, 47, 18),
373+
]
374+
expected[176:192] = tuples
375+
376+
npt.assert_array_equal(result, expected)
377+
378+
379+
def test_generate_alignment_mem_size_exceeds_threshold():
380+
"""Create alignment based on memory size histogram"""
381+
initial_row_count_histogram = hist.empty_histogram(2)
382+
filled_pixels = [4, 11, 14, 13, 5, 7, 8, 9, 11, 23, 4, 4, 17, 0, 1, 0]
383+
initial_row_count_histogram[176:] = filled_pixels[:]
384+
385+
initial_mem_size_histogram = hist.empty_histogram(2)
386+
filled_mem_sizes = [
387+
40_000,
388+
1_000,
389+
1_000,
390+
1_000,
391+
5_000,
392+
7_000,
393+
8_000,
394+
9_000,
395+
1_000,
396+
2_000,
397+
4_000,
398+
4_000,
399+
7_000,
400+
0,
401+
1_000,
402+
0,
403+
]
404+
initial_mem_size_histogram[176:] = filled_mem_sizes[:]
405+
406+
# We raise an error if any pixel exceeds the threshold.
407+
with pytest.raises(ValueError, match="exceeds threshold"):
408+
hist.generate_alignment(
409+
initial_row_count_histogram,
410+
highest_order=2,
411+
threshold=5_000,
412+
mem_size_histogram=initial_mem_size_histogram,
413+
)

0 commit comments

Comments
 (0)