|
13 | 13 | PartialChunkIterator,
|
14 | 14 | )
|
15 | 15 |
|
| 16 | +from zarr.tests.util import CountingDict |
| 17 | + |
16 | 18 |
|
17 | 19 | def test_normalize_integer_selection():
|
18 | 20 |
|
@@ -1451,3 +1453,75 @@ def test_numpy_int_indexing():
|
1451 | 1453 | z[:] = a
|
1452 | 1454 | assert a[42] == z[42]
|
1453 | 1455 | assert a[numpy.int64(42)] == z[numpy.int64(42)]
|
| 1456 | + |
| 1457 | + |
| 1458 | +@pytest.mark.parametrize( |
| 1459 | + "shape, chunks, ops", |
| 1460 | + [ |
| 1461 | + # 1D test cases |
| 1462 | + ((1070,), (50,), [("__getitem__", (slice(200, 400),))]), |
| 1463 | + ((1070,), (50,), [("__getitem__", (slice(200, 400, 100),))]), |
| 1464 | + ((1070,), (50,), [ |
| 1465 | + ("__getitem__", (slice(200, 400),)), |
| 1466 | + ("__setitem__", (slice(200, 400, 100),)), |
| 1467 | + ]), |
| 1468 | +
|
| 1469 | + # 2D test cases |
| 1470 | + ((40, 50), (5, 8), [ |
| 1471 | + ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), |
| 1472 | + ("__setitem__", (slice(None), (slice(None)))), |
| 1473 | + ]), |
| 1474 | + ] |
| 1475 | +) |
| 1476 | +def test_accessed_chunks(shape, chunks, ops): |
| 1477 | + # Test that only the required chunks are accessed during basic selection operations |
| 1478 | + # shape: array shape |
| 1479 | + # chunks: chunk size |
| 1480 | + # ops: list of tuples with (optype, tuple of slices) |
| 1481 | + # optype = "__getitem__" or "__setitem__", tuple length must match number of dims |
| 1482 | + import itertools |
| 1483 | + |
| 1484 | + # Use a counting dict as the backing store so we can track the items access |
| 1485 | + store = CountingDict() |
| 1486 | + z = zarr.create(shape=shape, chunks=chunks, store=store) |
| 1487 | + |
| 1488 | + for ii, (optype, slices) in enumerate(ops): |
| 1489 | + |
| 1490 | + # Resolve the slices into the accessed chunks for each dimension |
| 1491 | + chunks_per_dim = [] |
| 1492 | + for N, C, sl in zip(shape, chunks, slices): |
| 1493 | + chunk_ind = np.arange(N, dtype=int)[sl] // C |
| 1494 | + chunks_per_dim.append(np.unique(chunk_ind)) |
| 1495 | + |
| 1496 | + # Combine and generate the cartesian product to determine the chunks keys that |
| 1497 | + # will be accessed |
| 1498 | + chunks_accessed = [] |
| 1499 | + for comb in itertools.product(*chunks_per_dim): |
| 1500 | + chunks_accessed.append(".".join([str(ci) for ci in comb])) |
| 1501 | + |
| 1502 | + counts_before = store.counter.copy() |
| 1503 | + |
| 1504 | + # Perform the operation |
| 1505 | + if optype == "__getitem__": |
| 1506 | + z[slices] |
| 1507 | + else: |
| 1508 | + z[slices] = ii |
| 1509 | + |
| 1510 | + # Get the change in counts |
| 1511 | + delta_counts = store.counter - counts_before |
| 1512 | + |
| 1513 | + # Check that the access counts for the operation have increased by one for all |
| 1514 | + # the chunks we expect to be included |
| 1515 | + for ci in chunks_accessed: |
| 1516 | + assert delta_counts.pop((optype, ci)) == 1 |
| 1517 | + |
| 1518 | + # If the chunk was partially written to it will also have been read once. We |
| 1519 | + # don't determine if the chunk was actually partial here, just that the |
| 1520 | + # counts are consistent that this might have happened |
| 1521 | + if optype == "__setitem__": |
| 1522 | + assert ( |
| 1523 | + ("__getitem__", ci) not in delta_counts or |
| 1524 | + delta_counts.pop(("__getitem__", ci)) == 1 |
| 1525 | + ) |
| 1526 | + # Check that no other chunks were accessed |
| 1527 | + assert len(delta_counts) == 0 |
0 commit comments