|
7 | 7 | import logging |
8 | 8 | import os |
9 | 9 | import pickle |
| 10 | + |
| 11 | +from pyarrow import feather |
| 12 | +from pyarrow import Table |
10 | 13 | from io import StringIO |
11 | 14 |
|
12 | 15 | import numpy as np |
|
23 | 26 | try: |
24 | 27 | import libstempo as t2 |
25 | 28 | except ImportError: |
26 | | - logger.warning("libstempo not installed. Will use PINT instead.") # pragma: no cover |
| 29 | + logger.warning( |
| 30 | + "libstempo not installed. PINT or libstempo are required to use par and tim files." |
| 31 | + ) # pragma: no cover |
27 | 32 | t2 = None |
28 | 33 |
|
29 | 34 | try: |
|
32 | 37 | from pint.residuals import Residuals as resids |
33 | 38 | from pint.toa import TOAs |
34 | 39 | except ImportError: |
35 | | - logger.warning("PINT not installed. Will use libstempo instead.") # pragma: no cover |
| 40 | + logger.warning("PINT not installed. PINT or libstempo are required to use par and tim files.") # pragma: no cover |
36 | 41 | pint = None |
37 | 42 |
|
38 | 43 | try: |
|
42 | 47 | const = None |
43 | 48 | u = None |
44 | 49 |
|
45 | | -if pint is None and t2 is None: |
46 | | - err_msg = "Must have either PINT or libstempo timing package installed" |
47 | | - raise ImportError(err_msg) |
48 | | - |
49 | 50 |
|
50 | 51 | def get_maxobs(timfile): |
51 | 52 | """Utility function to return number of lines in tim file. |
@@ -161,6 +162,9 @@ def filter_data(self, start_time=None, end_time=None): |
161 | 162 |
|
162 | 163 | self.sort_data() |
163 | 164 |
|
| 165 | + def to_feather(self, filename, noisedict=None): |
| 166 | + FeatherPulsar.save_feather(self, filename, noisedict=noisedict) |
| 167 | + |
164 | 168 | def drop_not_picklable(self): |
165 | 169 | """Drop all attributes that cannot be pickled. |
166 | 170 |
|
@@ -421,6 +425,8 @@ def _set_dm(self, model): |
421 | 425 |
|
422 | 426 | if dmx: |
423 | 427 | self._dmx = dmx |
| 428 | + else: |
| 429 | + self._dmx = None |
424 | 430 |
|
425 | 431 | def _get_radec(self, model): |
426 | 432 | if hasattr(model, "RAJ") and hasattr(model, "DECJ"): |
@@ -565,6 +571,8 @@ def _set_dm(self, t2pulsar): |
565 | 571 |
|
566 | 572 | if dmx: |
567 | 573 | self._dmx = dmx |
| 574 | + else: |
| 575 | + self._dmx = None |
568 | 576 |
|
569 | 577 | def _get_radec(self, t2pulsar): |
570 | 578 | if "RAJ" in np.concatenate((t2pulsar.pars(which="fit"), t2pulsar.pars(which="set"))): |
@@ -655,7 +663,120 @@ def destroy(psr): # pragma: py-lt-38 |
655 | 663 | psr._deflated = "destroyed" |
656 | 664 |
|
657 | 665 |
|
| 666 | +class FeatherPulsar: |
| 667 | + columns = ["toas", "stoas", "toaerrs", "residuals", "freqs", "backend_flags", "telescope"] |
| 668 | + vector_columns = ["Mmat", "sunssb", "pos_t"] |
| 669 | + tensor_columns = ["planetssb"] |
| 670 | + # flags are done separately |
| 671 | + metadata = ["name", "dm", "dmx", "pdist", "pos", "phi", "theta"] |
| 672 | + # notes: currently ignores _isort/__isort and gets sorted versions |
| 673 | + |
| 674 | + def __init__(self): |
| 675 | + pass |
| 676 | + |
| 677 | + def __str__(self): |
| 678 | + return f"<Pulsar {self.name}: {len(self.residuals)} res, {self.Mmat.shape[1]} pars>" |
| 679 | + |
| 680 | + def __repr__(self): |
| 681 | + return str(self) |
| 682 | + |
| 683 | + def sort_data(self): |
| 684 | + """Sort data by time. This function is defined so that tests will pass.""" |
| 685 | + self._isort = np.argsort(self.toas, kind="mergesort") |
| 686 | + self._iisort = np.zeros(len(self._isort), dtype=int) |
| 687 | + for ii, p in enumerate(self._isort): |
| 688 | + self._iisort[p] = ii |
| 689 | + |
| 690 | + @classmethod |
| 691 | + def read_feather(cls, filename): |
| 692 | + f = feather.read_table(filename) |
| 693 | + self = FeatherPulsar() |
| 694 | + |
| 695 | + for array in FeatherPulsar.columns: |
| 696 | + if array in f.column_names: |
| 697 | + setattr(self, array, f[array].to_numpy()) |
| 698 | + |
| 699 | + for array in FeatherPulsar.vector_columns: |
| 700 | + cols = [c for c in f.column_names if c.startswith(array)] |
| 701 | + setattr(self, array, np.array([f[col].to_numpy() for col in cols]).swapaxes(0, 1).copy()) |
| 702 | + |
| 703 | + for array in FeatherPulsar.tensor_columns: |
| 704 | + rows = sorted(set(["_".join(c.split("_")[:-1]) for c in f.column_names if c.startswith(array)])) |
| 705 | + cols = [[c for c in f.column_names if c.startswith(row)] for row in rows] |
| 706 | + setattr( |
| 707 | + self, |
| 708 | + array, |
| 709 | + np.array([[f[col].to_numpy() for col in row] for row in cols]).swapaxes(0, 2).swapaxes(1, 2).copy(), |
| 710 | + ) |
| 711 | + |
| 712 | + self.flags = {} |
| 713 | + for array in [c for c in f.column_names if c.startswith("flags_")]: |
| 714 | + self.flags["_".join(array.split("_")[1:])] = f[array].to_numpy().astype("U") |
| 715 | + |
| 716 | + meta = json.loads(f.schema.metadata[b"json"]) |
| 717 | + for attr in FeatherPulsar.metadata: |
| 718 | + if attr in meta: |
| 719 | + setattr(self, attr, meta[attr]) |
| 720 | + else: |
| 721 | + print(f"Pulsar.read_feather: cannot find {attr} in feather file {filename}.") |
| 722 | + |
| 723 | + if "noisedict" in meta: |
| 724 | + setattr(self, "noisedict", meta["noisedict"]) |
| 725 | + |
| 726 | + self.sort_data() |
| 727 | + |
| 728 | + return self |
| 729 | + |
| 730 | + def to_list(a): |
| 731 | + return a.tolist() if isinstance(a, np.ndarray) else a |
| 732 | + |
| 733 | + def save_feather(self, filename, noisedict=None): |
| 734 | + self._toas = self._toas.astype(float) |
| 735 | + pydict = {array: getattr(self, array) for array in FeatherPulsar.columns} |
| 736 | + |
| 737 | + pydict.update( |
| 738 | + { |
| 739 | + f"{array}_{i}": getattr(self, array)[:, i] |
| 740 | + for array in FeatherPulsar.vector_columns |
| 741 | + for i in range(getattr(self, array).shape[1]) |
| 742 | + } |
| 743 | + ) |
| 744 | + |
| 745 | + pydict.update( |
| 746 | + { |
| 747 | + f"{array}_{i}_{j}": getattr(self, array)[:, i, j] |
| 748 | + for array in FeatherPulsar.tensor_columns |
| 749 | + for i in range(getattr(self, array).shape[1]) |
| 750 | + for j in range(getattr(self, array).shape[2]) |
| 751 | + } |
| 752 | + ) |
| 753 | + |
| 754 | + pydict.update({f"flags_{flag}": self.flags[flag] for flag in self.flags}) |
| 755 | + |
| 756 | + meta = {} |
| 757 | + for attr in Pulsar.metadata: |
| 758 | + if hasattr(self, attr): |
| 759 | + meta[attr] = Pulsar.to_list(getattr(self, attr)) |
| 760 | + else: |
| 761 | + print(f"Pulsar.save_feather: cannot find {attr} in Pulsar {self.name}.") |
| 762 | + |
| 763 | + # use attribute if present |
| 764 | + noisedict = getattr(self, "noisedict", None) if noisedict is None else noisedict |
| 765 | + if noisedict: |
| 766 | + # only keep noisedict entries that are for this pulsar (requires pulsar name to be first part of the key!) |
| 767 | + meta["noisedict"] = {par: val for par, val in noisedict.items() if par.startswith(self.name)} |
| 768 | + |
| 769 | + feather.write_feather(Table.from_pydict(pydict, metadata={"json": json.dumps(meta)}), filename) |
| 770 | + |
| 771 | + |
658 | 772 | def Pulsar(*args, **kwargs): |
| 773 | + featherfile = [x for x in args if isinstance(x, str) and x.endswith(".feather")] |
| 774 | + if featherfile: |
| 775 | + return FeatherPulsar.read_feather(featherfile[0]) |
| 776 | + featherfile = kwargs.get("filepath", None) |
| 777 | + if featherfile: |
| 778 | + return FeatherPulsar.read_feather(featherfile) |
| 779 | + |
659 | 780 | ephem = kwargs.get("ephem", None) |
660 | 781 | clk = kwargs.get("clk", None) |
661 | 782 | bipm_version = kwargs.get("bipm_version", None) |
|
0 commit comments