|
1 | 1 | import numpy as np
|
2 | 2 | import pandas as pd
|
3 | 3 | import pyBigWig
|
| 4 | +import pytest |
4 | 5 |
|
5 | 6 | from bigwig_loader import config
|
6 | 7 | from bigwig_loader.collection import BigWigCollection
|
@@ -77,3 +78,46 @@ def test_same_output_with_nans(bigwig_path):
|
77 | 78 | print(this_batch[pybigwig_batch != this_batch])
|
78 | 79 | print(pybigwig_batch[pybigwig_batch != this_batch])
|
79 | 80 | assert np.allclose(pybigwig_batch, this_batch, equal_nan=True)
|
| 81 | + |
| 82 | + |
| 83 | +@pytest.mark.parametrize("window_size", [2, 11, 128]) |
| 84 | +@pytest.mark.parametrize("default_value", [0.0, np.nan, 2.0, 5.6, 10]) |
| 85 | +@pytest.mark.parametrize("sequence_length", [1000, 2048]) |
| 86 | +def test_windowed_output_against_pybigwig( |
| 87 | + bigwig_path, window_size, default_value, sequence_length |
| 88 | +): |
| 89 | + print("window_size:", window_size) |
| 90 | + pybigwig_collection = PyBigWigCollection(bigwig_path, first_n_files=3) |
| 91 | + collection = BigWigCollection(bigwig_path, first_n_files=3) |
| 92 | + |
| 93 | + df = pd.read_csv(config.example_positions, sep="\t") |
| 94 | + df = df[df["chr"].isin(collection.get_chromosomes_present_in_all_files())] |
| 95 | + |
| 96 | + chromosomes = list(df["chr"]) |
| 97 | + starts = list(df["center"] - sequence_length // 2) |
| 98 | + ends = [position + sequence_length for position in starts] |
| 99 | + |
| 100 | + pybigwig_batch = pybigwig_collection.get_batch(chromosomes, starts, ends) |
| 101 | + |
| 102 | + this_batch = collection.get_batch( |
| 103 | + chromosomes, starts, ends, default_value=default_value, window_size=window_size |
| 104 | + ).get() |
| 105 | + |
| 106 | + # Reshape the tensor so the last dimension contains |
| 107 | + # all the values corresponding to one window |
| 108 | + reduced_dim = sequence_length // window_size |
| 109 | + pybigwig_batch = pybigwig_batch[:, :, : reduced_dim * window_size] |
| 110 | + pybigwig_batch = pybigwig_batch.reshape( |
| 111 | + pybigwig_batch.shape[0], pybigwig_batch.shape[1], reduced_dim, window_size |
| 112 | + ) |
| 113 | + |
| 114 | + # fill nan's with the chosen default value |
| 115 | + pybigwig_batch = np.nan_to_num(pybigwig_batch, copy=False, nan=default_value) |
| 116 | + # And take mean over the window |
| 117 | + pybigwig_batch = np.nanmean(pybigwig_batch, axis=-1) |
| 118 | + |
| 119 | + print("PyBigWig (with window function applied afterwards):") |
| 120 | + print(pybigwig_batch) |
| 121 | + print("bigwig-loader:") |
| 122 | + print(this_batch) |
| 123 | + assert np.allclose(pybigwig_batch, this_batch, equal_nan=True) |
0 commit comments