-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
COMPAT: reading generic PyTables Table format fails with sub-selection #26818
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
19dc304
e9c7c39
b7a082a
964cba1
04b8423
7d200a5
70e78c9
e7eb30a
5b881d8
9641111
d2a9882
aa83473
065054f
9cc85ce
cd69c0b
a9c6f15
8adf459
aed78ff
53dba1a
dfec26e
b9421af
9c0e96b
79bed6a
bca6ee6
60d37e0
1ce1a70
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1624,7 +1624,8 @@ def infer(self, handler): | |
new_self.read_metadata(handler) | ||
return new_self | ||
|
||
def convert(self, values, nan_rep, encoding, errors): | ||
def convert(self, values, nan_rep, encoding, errors, start=None, | ||
stop=None): | ||
""" set the values from this selection: take = take ownership """ | ||
|
||
# values is a recarray | ||
|
@@ -1813,10 +1814,29 @@ class GenericIndexCol(IndexCol): | |
def is_indexed(self): | ||
return False | ||
|
||
def convert(self, values, nan_rep, encoding, errors): | ||
""" set the values from this selection: take = take ownership """ | ||
def convert(self, values, nan_rep, encoding, errors, start=None, | ||
stop=None): | ||
""" set the values from this selection: take = take ownership | ||
|
||
Parameters | ||
---------- | ||
|
||
values : np.ndarray | ||
nan_rep : str | ||
encoding : str | ||
errors : str | ||
start : int, optional | ||
Table row number: the start of the sub-selection. | ||
stop : int, optional | ||
Table row number: the end of the sub-selection. Values larger than | ||
the underlying table's row count are normalized to that. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least here I understand the meaning of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right, can you add all the parameters. See if you can put something down for them, some doc-string is better than none. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 Adding statements for The biggest challenge for me still is that I do not feel like I understand the general purpose of the convert() method and its arguments. |
||
""" | ||
|
||
start = start if start is not None else 0 | ||
stop = (min(stop, self.table.nrows) | ||
if stop is not None else self.table.nrows) | ||
self.values = Int64Index(np.arange(stop - start)) | ||
|
||
self.values = Int64Index(np.arange(self.table.nrows)) | ||
return self | ||
|
||
def get_attr(self): | ||
|
@@ -2159,7 +2179,8 @@ def validate_attr(self, append): | |
raise ValueError("appended items dtype do not match existing " | ||
"items dtype in table!") | ||
|
||
def convert(self, values, nan_rep, encoding, errors): | ||
def convert(self, values, nan_rep, encoding, errors, start=None, | ||
stop=None): | ||
"""set the data from this selection (and convert to the correct dtype | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
if we can) | ||
""" | ||
|
@@ -3431,8 +3452,11 @@ def read_axes(self, where, **kwargs): | |
# convert the data | ||
for a in self.axes: | ||
a.set_info(self.info) | ||
# `kwargs` may contain `start` and `stop` arguments if passed to | ||
# `store.select()`. If set they determine the index size. | ||
a.convert(values, nan_rep=self.nan_rep, encoding=self.encoding, | ||
errors=self.errors) | ||
errors=self.errors, start=kwargs.get('start'), | ||
stop=kwargs.get('stop')) | ||
|
||
return True | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import pytest | ||
|
||
import pandas as pd | ||
from pandas.tests.io.test_pytables import ensure_clean_path | ||
from pandas.util.testing import assert_frame_equal | ||
|
||
tables = pytest.importorskip('tables') | ||
|
||
|
||
@pytest.fixture | ||
def pytables_hdf5_file(): | ||
"""Use PyTables to create a simple HDF5 file.""" | ||
|
||
table_schema = { | ||
'c0': tables.Time64Col(pos=0), | ||
'c1': tables.StringCol(5, pos=1), | ||
'c2': tables.Int64Col(pos=2), | ||
} | ||
|
||
t0 = 1561105000.0 | ||
|
||
testsamples = [ | ||
{'c0': t0, 'c1': 'aaaaa', 'c2': 1}, | ||
{'c0': t0 + 1, 'c1': 'bbbbb', 'c2': 2}, | ||
{'c0': t0 + 2, 'c1': 'ccccc', 'c2': 10**5}, | ||
{'c0': t0 + 3, 'c1': 'ddddd', 'c2': 4294967295}, | ||
] | ||
|
||
objname = 'pandas_test_timeseries' | ||
|
||
with ensure_clean_path('written_with_pytables.h5') as path: | ||
# The `ensure_clean_path` context mgr removes the temp file upon exit. | ||
with tables.open_file(path, mode='w') as f: | ||
t = f.create_table('/', name=objname, description=table_schema) | ||
for sample in testsamples: | ||
for key, value in sample.items(): | ||
t.row[key] = value | ||
t.row.append() | ||
|
||
yield path, objname, pd.DataFrame(testsamples) | ||
|
||
|
||
class TestReadPyTablesHDF5: | ||
""" | ||
A group of tests which covers reading HDF5 files written by plain PyTables | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add the issue number here in the comment |
||
(not written by pandas). | ||
|
||
Was introduced for regression-testing issue 11188. | ||
""" | ||
|
||
def test_read_complete(self, pytables_hdf5_file): | ||
path, objname, df = pytables_hdf5_file | ||
result = pd.read_hdf(path, key=objname) | ||
expected = df | ||
assert_frame_equal(result, expected) | ||
|
||
def test_read_with_start(self, pytables_hdf5_file): | ||
path, objname, df = pytables_hdf5_file | ||
# This is a regression test for pandas-dev/pandas/issues/11188 | ||
result = pd.read_hdf(path, key=objname, start=1) | ||
expected = df[1:].reset_index(drop=True) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_read_with_stop(self, pytables_hdf5_file): | ||
path, objname, df = pytables_hdf5_file | ||
# This is a regression test for pandas-dev/pandas/issues/11188 | ||
result = pd.read_hdf(path, key=objname, stop=1) | ||
expected = df[:1].reset_index(drop=True) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_read_with_startstop(self, pytables_hdf5_file): | ||
path, objname, df = pytables_hdf5_file | ||
# This is a regression test for pandas-dev/pandas/issues/11188 | ||
result = pd.read_hdf(path, key=objname, start=1, stop=2) | ||
expected = df[1:2].reset_index(drop=True) | ||
assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a doc-string here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
types are a bonus
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would just add
start=None
andstop=None
instead of kwargsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, commit upcoming
I would love to add one! I have difficulties understanding the general purpose of the
convert()
method though and documentingvalues
,nan_rep
,encoding
, anderrors
is pretty challenging given my lack of understanding. As far as I see thestart
andstop
arguments are not really meaningful arguments here (inIndexCol.convert()
), so I am also a bit helpless with documenting them! :-) How would you documentstart
andstop
here?