@@ -72,7 +72,12 @@ def generate_histogram(
7272
7373
7474def generate_alignment (
75- histogram , highest_order = 10 , lowest_order = 0 , threshold = 1_000_000 , drop_empty_siblings = False
75+ row_count_histogram ,
76+ highest_order = 10 ,
77+ lowest_order = 0 ,
78+ threshold = 1_000_000 ,
79+ drop_empty_siblings = False ,
80+ mem_size_histogram = None ,
7681):
7782 """Generate alignment from high order pixels to those of equal or lower order
7883
@@ -83,9 +88,9 @@ def generate_alignment(
8388
8489 Parameters
8590 ----------
86- histogram : np.array
87- one-dimensional numpy array of long integers where the
88- value at each index corresponds to the number of objects found at the healpix pixel.
91+ row_count_histogram : np.array
92+ one-dimensional numpy array of long integers where the value at each index corresponds to
93+ the number of objects found at the healpix pixel.
8994 highest_order : int
9095 the highest healpix order (e.g. 5-10) (Default value = 10)
9196 lowest_order : int
@@ -95,6 +100,10 @@ def generate_alignment(
95100 the maximum number of objects allowed in a single pixel (Default value = 1_000_000)
96101 drop_empty_siblings : bool
97102 if 3 of 4 pixels are empty, keep only the non-empty pixel (Default value = False)
103+ mem_size_histogram : np.array or None
104+ one-dimensional numpy array of long integers where the value at each index corresponds to
105+ the memory size (in bytes) of objects found at the healpix pixel. If provided, this will be
106+ used to determine the thresholding instead of the param `histogram`. (Default value = None)
98107
99108 Returns
100109 -------
@@ -113,23 +122,41 @@ def generate_alignment(
113122 if the histogram is the wrong size, or some initial histogram bins
114123 exceed threshold.
115124 """
116- _validate_alignment_arguments (histogram , highest_order , lowest_order , threshold )
125+ # Validate inputs.
126+ _validate_alignment_arguments (
127+ row_count_histogram , mem_size_histogram , highest_order , lowest_order , threshold
128+ )
117129
118- nested_sums = _get_nested_sums (histogram , highest_order , lowest_order )
130+ # Generate nested sums.
131+ nested_sums_row_count = _get_nested_sums (row_count_histogram , highest_order , lowest_order )
132+ if mem_size_histogram is not None :
133+ nested_sums_mem_size = _get_nested_sums (mem_size_histogram , highest_order , lowest_order )
134+ else :
135+ nested_sums_mem_size = None
119136
137+ # Generate alignment.
120138 if drop_empty_siblings :
121- return _get_alignment_dropping_siblings (nested_sums , highest_order , lowest_order , threshold )
122- return _get_alignment (nested_sums , highest_order , lowest_order , threshold )
139+ return _get_alignment_dropping_siblings (
140+ nested_sums_row_count , highest_order , lowest_order , threshold , nested_sums_mem_size
141+ )
142+ return _get_alignment (nested_sums_row_count , highest_order , lowest_order , threshold , nested_sums_mem_size )
123143
124144
125- def _validate_alignment_arguments (histogram , highest_order , lowest_order , threshold ):
126- if len (histogram ) != hp .order2npix (highest_order ):
145+ def _validate_alignment_arguments (
146+ row_count_histogram , mem_size_histogram , highest_order , lowest_order , threshold
147+ ):
148+ if len (row_count_histogram ) != hp .order2npix (highest_order ):
127149 raise ValueError ("histogram is not the right size" )
128150 if lowest_order > highest_order :
129151 raise ValueError ("lowest_order should be less than highest_order" )
130- max_bin = np .amax (histogram )
131- if max_bin > threshold :
132- raise ValueError (f"single pixel count { max_bin } exceeds threshold { threshold } " )
152+ if mem_size_histogram is not None :
153+ max_bin = np .amax (mem_size_histogram )
154+ if max_bin > threshold :
155+ raise ValueError (f"single pixel mem_size { max_bin } exceeds threshold { threshold } " )
156+ else :
157+ max_bin = np .amax (row_count_histogram )
158+ if max_bin > threshold :
159+ raise ValueError (f"single pixel row count { max_bin } exceeds threshold { threshold } " )
133160
134161
135162def _get_nested_sums (histogram , highest_order , lowest_order ):
@@ -150,16 +177,22 @@ def _get_nested_sums(histogram, highest_order, lowest_order):
150177 return nested_sums
151178
152179
153- def _get_alignment (nested_sums , highest_order , lowest_order , threshold ):
180+ def _get_alignment (nested_sums_row_count , highest_order , lowest_order , threshold , nested_sums_mem_size ):
154181 """Method to aggregate pixels up to the threshold.
155182
156183 Checks from low order (large areas), drilling down into higher orders (smaller areas) to
157184 find the appropriate order for an area of sky."""
185+ if nested_sums_mem_size is not None :
186+ nested_sums = nested_sums_mem_size
187+ else :
188+ nested_sums = nested_sums_row_count
189+
190+ # Initialize our alignment structure.
158191 nested_alignment = []
159192 for i in range (0 , highest_order + 1 ):
160193 nested_alignment .append (np .full (hp .order2npix (i ), None ))
161194
162- # work forward - determine if we should map to a lower order pixel, this pixel, or keep looking.
195+ # Work forward - determine if we should map to a lower order pixel, this pixel, or keep looking.
163196 for read_order in range (lowest_order , highest_order + 1 ):
164197 parent_order = read_order - 1
165198 for index in range (0 , len (nested_sums [read_order ])):
@@ -173,16 +206,40 @@ def _get_alignment(nested_sums, highest_order, lowest_order, threshold):
173206 elif nested_sums [read_order ][index ] == 0 :
174207 continue
175208 elif nested_sums [read_order ][index ] <= threshold :
176- nested_alignment [read_order ][index ] = (
177- read_order ,
178- index ,
179- nested_sums [read_order ][index ],
180- )
209+ # For row_count mode, use tuple of (order, pixel, row_count)
210+ if not nested_sums_mem_size :
211+ nested_alignment [read_order ][index ] = (
212+ read_order ,
213+ index ,
214+ nested_sums [read_order ][index ],
215+ )
216+ # For mem_size mode, use tuple of (order, pixel, row_count, mem_size)
217+ else :
218+ mem_size = nested_sums_mem_size [read_order ][index ]
219+ row_count = nested_sums_row_count [read_order ][index ]
220+ nested_alignment [read_order ][index ] = (
221+ read_order ,
222+ index ,
223+ row_count ,
224+ mem_size ,
225+ )
226+
227+ # We no longer need to store mem_size sums once the alignment has been constructed.
228+ if nested_sums_mem_size is not None :
229+ nested_alignment [highest_order ] = np .array (
230+ [
231+ pixel_alignment [:3 ] if pixel_alignment else None
232+ for pixel_alignment in nested_alignment [highest_order ]
233+ ],
234+ dtype = "object" ,
235+ )
181236
182237 return nested_alignment [highest_order ]
183238
184239
185- def _get_alignment_dropping_siblings (nested_sums , highest_order , lowest_order , threshold ):
240+ def _get_alignment_dropping_siblings (
241+ nested_sum_row_count , highest_order , lowest_order , threshold , nested_sums_mem_size
242+ ):
186243 """Method to aggregate pixels up to the threshold that collapses completely empty pixels away.
187244
188245 Checks from higher order (smaller areas) out to lower order (large areas). In this way, we are able to
@@ -209,6 +266,15 @@ def _get_alignment_dropping_siblings(nested_sums, highest_order, lowest_order, t
209266 - total number in cell is greater than the threshold
210267 - only one subcell contains values
211268 """
269+ # If nested_sums_mem_size is provided, we're in mem_size mode (and thresholding by memory size).
270+ # This means we'll want to use the mem_size sums to generate our alignment, but still keep track
271+ # of the row counts for the output.
272+ if nested_sums_mem_size is not None :
273+ nested_sums = nested_sums_mem_size
274+ else :
275+ nested_sums = nested_sum_row_count
276+
277+ # Initialize our order map to the highest order.
212278 order_map = np .array (
213279 [highest_order if count > 0 else - 1 for count in nested_sums [highest_order ]], dtype = np .int32
214280 )
@@ -237,19 +303,22 @@ def _get_alignment_dropping_siblings(nested_sums, highest_order, lowest_order, t
237303 )
238304 for pixel_high_index , intended_order in enumerate (order_map )
239305 ]
306+ # In both row_count and mem_size mode, use tuple of (order, pixel, row_count)
307+ # as mem_size is no longer needed now that alignment has been constructed.
240308 nested_alignment = [
241- (tup [0 ], tup [1 ], nested_sums [tup [0 ]][tup [1 ]]) if tup else None for tup in nested_alignment
309+ (tup [0 ], tup [1 ], nested_sum_row_count [tup [0 ]][tup [1 ]]) if tup else None for tup in nested_alignment
242310 ]
243311
244312 return np .array (nested_alignment , dtype = "object" )
245313
246314
247315def generate_incremental_alignment (
248- histogram : np .ndarray ,
316+ row_count_histogram : np .ndarray ,
249317 existing_pixels : Sequence [tuple [int , int ]],
250318 highest_order : int = 10 ,
251319 lowest_order : int = 0 ,
252320 threshold : int = 1_000_000 ,
321+ mem_size_histogram : np .ndarray | None = None ,
253322):
254323 """Generate alignment for an incremental catalog.
255324
@@ -262,7 +331,7 @@ def generate_incremental_alignment(
262331
263332 Parameters
264333 ----------
265- histogram : np.ndarray
334+ row_count_histogram : np.ndarray
266335 one-dimensional numpy array of long integers where the
267336 value at each index corresponds to the number of objects
268337 found at the healpix pixel.
@@ -275,6 +344,10 @@ def generate_incremental_alignment(
275344 constrains the partitioning to prevent spatially large pixels. (Default value = 0)
276345 threshold : int
277346 the maximum number of objects allowed in a single pixel (Default value = 1_000_000)
347+ mem_size_histogram : np.ndarray or None
348+ one-dimensional numpy array of long integers where the value at each index corresponds to
349+ the memory size (in bytes) of objects found at the healpix pixel. If provided, this will be
350+ used to determine the thresholding instead of the param `histogram`. (Default value = None)
278351
279352 Returns
280353 -------
@@ -287,9 +360,11 @@ def generate_incremental_alignment(
287360 - pixel number *at the above order*
288361 - the number of objects in the pixel
289362 """
290- _validate_alignment_arguments (histogram , highest_order , lowest_order , threshold )
363+ _validate_alignment_arguments (
364+ row_count_histogram , mem_size_histogram , highest_order , lowest_order , threshold
365+ )
291366
292- nested_sums = _get_nested_sums (histogram , highest_order , lowest_order )
367+ nested_sums = _get_nested_sums (row_count_histogram , highest_order , lowest_order )
293368
294369 tree = PixelTree .from_healpix (existing_pixels )
295370 if tree .tree_order > highest_order :
0 commit comments