diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index ae604b1141204..46d8ef04dd8e5 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -51,6 +51,19 @@ tables = pytest.importorskip("tables") +@pytest.fixture +def setup_path(): + """Fixture for setup path""" + return "tmp.__{}__.h5".format(tm.rands(10)) + + +@pytest.fixture(scope="class", autouse=True) +def setup_mode(): + tm.reset_testing_mode() + yield + tm.set_testing_mode() + + # TODO: # remove when gh-24839 is fixed; this affects numpy 1.16 # and pytables 3.4.4 @@ -148,36 +161,16 @@ def _maybe_remove(store, key): pass -class Base: - @classmethod - def setup_class(cls): - - # Pytables 3.0.0 deprecates lots of things - tm.reset_testing_mode() - - @classmethod - def teardown_class(cls): - - # Pytables 3.0.0 deprecates lots of things - tm.set_testing_mode() - - def setup_method(self, method): - self.path = "tmp.__{}__.h5".format(tm.rands(10)) - - def teardown_method(self, method): - pass - - @pytest.mark.single -class TestHDFStore(Base): - def test_format_kwarg_in_constructor(self): +class TestHDFStore: + def test_format_kwarg_in_constructor(self, setup_path): # GH 13291 - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with pytest.raises(ValueError): HDFStore(path, format="table") - def test_context(self): - path = create_tempfile(self.path) + def test_context(self, setup_path): + path = create_tempfile(setup_path) try: with HDFStore(path) as tbl: raise ValueError("blah") @@ -196,8 +189,8 @@ def test_context(self): finally: safe_remove(path) - def test_conv_read_write(self): - path = create_tempfile(self.path) + def test_conv_read_write(self, setup_path): + path = create_tempfile(setup_path) try: def roundtrip(key, obj, **kwargs): @@ -222,24 +215,24 @@ def roundtrip(key, obj, **kwargs): finally: safe_remove(path) - def test_long_strings(self): + def test_long_strings(self, setup_path): # GH6166 df = DataFrame( {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10) ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df", df, data_columns=["a"]) result = store.select("df") assert_frame_equal(df, result) - def test_api(self): + def test_api(self, setup_path): # GH4584 # API issue when to_hdf doesn't accept append AND format args - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.iloc[:10].to_hdf(path, "df", append=True, format="table") @@ -251,7 +244,7 @@ def test_api(self): df.iloc[10:].to_hdf(path, "df", append=True, format="table") assert_frame_equal(read_hdf(path, "df"), df) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.iloc[:10].to_hdf(path, "df", append=True) @@ -263,7 +256,7 @@ def test_api(self): df.iloc[10:].to_hdf(path, "df", append=True) assert_frame_equal(read_hdf(path, "df"), df) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", append=False, format="fixed") @@ -278,7 +271,7 @@ def test_api(self): df.to_hdf(path, "df") assert_frame_equal(read_hdf(path, "df"), df) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: path = store._path df = tm.makeDataFrame() @@ -305,7 +298,7 @@ def test_api(self): store.append("df", df.iloc[10:], append=True, format=None) assert_frame_equal(store.select("df"), df) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: # Invalid. df = tm.makeDataFrame() @@ -326,10 +319,10 @@ def test_api(self): with pytest.raises(FileNotFoundError): read_hdf(path, "df") - def test_api_default_format(self): + def test_api_default_format(self, setup_path): # default_format option - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeDataFrame() pd.set_option("io.hdf.default_format", "fixed") @@ -349,7 +342,7 @@ def test_api_default_format(self): pd.set_option("io.hdf.default_format", None) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() @@ -370,9 +363,9 @@ def test_api_default_format(self): pd.set_option("io.hdf.default_format", None) - def test_keys(self): + def test_keys(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["a"] = tm.makeTimeSeries() store["b"] = tm.makeStringSeries() store["c"] = tm.makeDataFrame() @@ -382,12 +375,12 @@ def test_keys(self): assert set(store.keys()) == expected assert set(store) == expected - def test_keys_ignore_hdf_softlink(self): + def test_keys_ignore_hdf_softlink(self, setup_path): # GH 20523 # Puts a softlink into HDF file and rereads - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame(dict(A=range(5), B=range(5))) store.put("df", df) @@ -399,15 +392,15 @@ def test_keys_ignore_hdf_softlink(self): # Should ignore the softlink assert store.keys() == ["/df"] - def test_iter_empty(self): + def test_iter_empty(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # GH 12221 assert list(store) == [] - def test_repr(self): + def test_repr(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: repr(store) store.info() store["a"] = tm.makeTimeSeries() @@ -441,7 +434,7 @@ def test_repr(self): store.info() # storers - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeDataFrame() store.append("df", df) @@ -451,9 +444,9 @@ def test_repr(self): str(s) @ignore_natural_naming_warning - def test_contains(self): + def test_contains(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["a"] = tm.makeTimeSeries() store["b"] = tm.makeDataFrame() store["foo/bar"] = tm.makeDataFrame() @@ -470,9 +463,9 @@ def test_contains(self): store["node())"] = tm.makeDataFrame() assert "node())" in store - def test_versioning(self): + def test_versioning(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["a"] = tm.makeTimeSeries() store["b"] = tm.makeDataFrame() df = tm.makeTimeDataFrame() @@ -493,13 +486,13 @@ def test_versioning(self): with pytest.raises(Exception): store.select("df2") - def test_mode(self): + def test_mode(self, setup_path): df = tm.makeTimeDataFrame() def check(mode): - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: # constructor if mode in ["r", "r+"]: @@ -511,7 +504,7 @@ def check(mode): assert store._handle.mode == mode store.close() - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: # context if mode in ["r", "r+"]: @@ -522,7 +515,7 @@ def check(mode): with HDFStore(path, mode=mode) as store: assert store._handle.mode == mode - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: # conv write if mode in ["r", "r+"]: @@ -543,7 +536,7 @@ def check(mode): def check_default_mode(): # read_hdf uses default mode - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", mode="w") result = read_hdf(path, "df") assert_frame_equal(result, df) @@ -554,9 +547,9 @@ def check_default_mode(): check("w") check_default_mode() - def test_reopen_handle(self): + def test_reopen_handle(self, setup_path): - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: store = HDFStore(path, mode="a") store["a"] = tm.makeTimeSeries() @@ -602,9 +595,9 @@ def test_reopen_handle(self): store.close() assert not store.is_open - def test_open_args(self): + def test_open_args(self, setup_path): - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() @@ -623,16 +616,16 @@ def test_open_args(self): # the file should not have actually been written assert not os.path.exists(path) - def test_flush(self): + def test_flush(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["a"] = tm.makeTimeSeries() store.flush() store.flush(fsync=True) - def test_get(self): + def test_get(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["a"] = tm.makeTimeSeries() left = store.get("a") right = store["a"] @@ -666,7 +659,7 @@ def test_get(self): ), ], ) - def test_walk(self, where, expected): + def test_walk(self, where, expected, setup_path): # GH10143 objs = { "df1": pd.DataFrame([1, 2, 3]), @@ -705,9 +698,9 @@ def test_walk(self, where, expected): else: tm.assert_series_equal(obj, objs[leaf]) - def test_getattr(self): + def test_getattr(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: s = tm.makeTimeSeries() store["a"] = s @@ -732,9 +725,9 @@ def test_getattr(self): for x in ["mode", "path", "handle", "complib"]: getattr(store, "_{x}".format(x=x)) - def test_put(self): + def test_put(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() @@ -763,9 +756,9 @@ def test_put(self): store.put("c", df[:10], format="table", append=False) tm.assert_frame_equal(df[:10], store["c"]) - def test_put_string_index(self): + def test_put_string_index(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: index = Index( ["I am a very long string index: {i}".format(i=i) for i in range(20)] @@ -792,9 +785,9 @@ def test_put_string_index(self): store["b"] = df tm.assert_frame_equal(store["b"], df) - def test_put_compression(self): + def test_put_compression(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeTimeDataFrame() store.put("c", df, format="table", complib="zlib") @@ -805,10 +798,10 @@ def test_put_compression(self): store.put("b", df, format="fixed", complib="zlib") @td.skip_if_windows_python_3 - def test_put_compression_blosc(self): + def test_put_compression_blosc(self, setup_path): df = tm.makeTimeDataFrame() - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # can't compress if format='fixed' with pytest.raises(ValueError): @@ -817,13 +810,13 @@ def test_put_compression_blosc(self): store.put("c", df, format="table", complib="blosc") tm.assert_frame_equal(store["c"], df) - def test_complibs_default_settings(self): + def test_complibs_default_settings(self, setup_path): # GH15943 df = tm.makeDataFrame() # Set complevel and check if complib is automatically set to # default value - with ensure_clean_path(self.path) as tmpfile: + with ensure_clean_path(setup_path) as tmpfile: df.to_hdf(tmpfile, "df", complevel=9) result = pd.read_hdf(tmpfile, "df") tm.assert_frame_equal(result, df) @@ -834,7 +827,7 @@ def test_complibs_default_settings(self): assert node.filters.complib == "zlib" # Set complib and check to see if compression is disabled - with ensure_clean_path(self.path) as tmpfile: + with ensure_clean_path(setup_path) as tmpfile: df.to_hdf(tmpfile, "df", complib="zlib") result = pd.read_hdf(tmpfile, "df") tm.assert_frame_equal(result, df) @@ -845,7 +838,7 @@ def test_complibs_default_settings(self): assert node.filters.complib is None # Check if not setting complib or complevel results in no compression - with ensure_clean_path(self.path) as tmpfile: + with ensure_clean_path(setup_path) as tmpfile: df.to_hdf(tmpfile, "df") result = pd.read_hdf(tmpfile, "df") tm.assert_frame_equal(result, df) @@ -856,7 +849,7 @@ def test_complibs_default_settings(self): assert node.filters.complib is None # Check if file-defaults can be overridden on a per table basis - with ensure_clean_path(self.path) as tmpfile: + with ensure_clean_path(setup_path) as tmpfile: store = pd.HDFStore(tmpfile) store.append("dfc", df, complevel=9, complib="blosc") store.append("df", df) @@ -870,7 +863,7 @@ def test_complibs_default_settings(self): assert node.filters.complevel == 9 assert node.filters.complib == "blosc" - def test_complibs(self): + def test_complibs(self, setup_path): # GH14478 df = tm.makeDataFrame() @@ -887,7 +880,7 @@ def test_complibs(self): all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels] for (lib, lvl) in all_tests: - with ensure_clean_path(self.path) as tmpfile: + with ensure_clean_path(setup_path) as tmpfile: gname = "foo" # Write and read file to see if data is consistent @@ -906,13 +899,13 @@ def test_complibs(self): assert node.filters.complib == lib h5table.close() - def test_put_integer(self): + def test_put_integer(self, setup_path): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) - self._check_roundtrip(df, tm.assert_frame_equal) + self._check_roundtrip(df, tm.assert_frame_equal, setup_path) @xfail_non_writeable - def test_put_mixed_type(self): + def test_put_mixed_type(self, setup_path): df = tm.makeTimeDataFrame() df["obj1"] = "foo" df["obj2"] = "bar" @@ -928,7 +921,7 @@ def test_put_mixed_type(self): df.loc[3:6, ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df") # PerformanceWarning @@ -942,9 +935,9 @@ def test_put_mixed_type(self): @pytest.mark.filterwarnings( "ignore:object name:tables.exceptions.NaturalNameWarning" ) - def test_append(self): + def test_append(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # this is allowed by almost always don't want to do it # tables.NaturalNameWarning): @@ -1010,9 +1003,9 @@ def test_append(self): store.append("uints", uint_data, data_columns=["u08", "u16", "u32"]) tm.assert_frame_equal(store["uints"], uint_data) - def test_append_series(self): + def test_append_series(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # basic ss = tm.makeStringSeries() @@ -1056,11 +1049,11 @@ def test_append_series(self): store.append("mi", s) tm.assert_series_equal(store["mi"], s) - def test_store_index_types(self): + def test_store_index_types(self, setup_path): # GH5386 # test storing various index types - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: def check(format, index): df = DataFrame(np.random.randn(10, 2), columns=list("AB")) @@ -1093,9 +1086,9 @@ def check(format, index): @pytest.mark.skipif( not is_platform_little_endian(), reason="reason platform is not little endian" ) - def test_encoding(self): + def test_encoding(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame(dict(A="foo", B="bar"), index=range(5)) df.loc[2, "A"] = np.nan df.loc[3, "B"] = np.nan @@ -1122,7 +1115,7 @@ def test_encoding(self): ], ) @pytest.mark.parametrize("dtype", ["category", object]) - def test_latin_encoding(self, dtype, val): + def test_latin_encoding(self, setup_path, dtype, val): enc = "latin-1" nan_rep = "" key = "data" @@ -1130,7 +1123,7 @@ def test_latin_encoding(self, dtype, val): val = [x.decode(enc) if isinstance(x, bytes) else x for x in val] ser = pd.Series(val, dtype=dtype) - with ensure_clean_path(self.path) as store: + with ensure_clean_path(setup_path) as store: ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep) retr = read_hdf(store, key) @@ -1147,9 +1140,9 @@ def test_latin_encoding(self, dtype, val): # for x in examples: # roundtrip(s, nan_rep=b'\xf8\xfc') - def test_append_some_nans(self): + def test_append_some_nans(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame( { "A": Series(np.random.randn(20)).astype("int32"), @@ -1193,9 +1186,9 @@ def test_append_some_nans(self): store.append("df3", df3[10:]) tm.assert_frame_equal(store["df3"], df3) - def test_append_all_nans(self): + def test_append_all_nans(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame( {"A1": np.random.randn(20), "A2": np.random.randn(20)}, @@ -1283,14 +1276,14 @@ def test_append_all_nans(self): {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]} ) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df_with_missing.to_hdf(path, "df_with_missing", format="table") reloaded = read_hdf(path, "df_with_missing") tm.assert_frame_equal(df_with_missing, reloaded) - def test_read_missing_key_close_store(self): + def test_read_missing_key_close_store(self, setup_path): # GH 25766 - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = pd.DataFrame({"a": range(2), "b": range(2)}) df.to_hdf(path, "k1") @@ -1301,9 +1294,9 @@ def test_read_missing_key_close_store(self): # read with KeyError before another write df.to_hdf(path, "k2") - def test_append_frame_column_oriented(self): + def test_append_frame_column_oriented(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # column oriented df = tm.makeTimeDataFrame() @@ -1325,10 +1318,10 @@ def test_append_frame_column_oriented(self): with pytest.raises(TypeError): store.select("df1", "columns=A and index>df.index[4]") - def test_append_with_different_block_ordering(self): + def test_append_with_different_block_ordering(self, setup_path): # GH 4096; using same frames, but different block orderings - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: for i in range(10): @@ -1351,7 +1344,7 @@ def test_append_with_different_block_ordering(self): # test a different ordering but with more fields (like invalid # combinate) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64") df["int64"] = Series([1] * len(df), dtype="int64") @@ -1368,9 +1361,9 @@ def test_append_with_different_block_ordering(self): with pytest.raises(ValueError): store.append("df", df) - def test_append_with_strings(self): + def test_append_with_strings(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: with catch_warnings(record=True): def check_col(key, name, size): @@ -1444,7 +1437,7 @@ def check_col(key, name, size): result = store.select("df") tm.assert_frame_equal(result, df) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: def check_col(key, name, size): assert getattr( @@ -1484,9 +1477,9 @@ def check_col(key, name, size): with pytest.raises(ValueError): store.append("df", df, min_itemsize={"foo": 20, "foobar": 20}) - def test_append_with_empty_string(self): + def test_append_with_empty_string(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # with all empty strings (GH 12242) df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]}) @@ -1494,9 +1487,9 @@ def test_append_with_empty_string(self): store.append("df", df[-1:], min_itemsize={"x": 1}) tm.assert_frame_equal(store.select("df"), df) - def test_to_hdf_with_min_itemsize(self): + def test_to_hdf_with_min_itemsize(self, setup_path): - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: # min_itemsize in index with to_hdf (GH 10381) df = tm.makeMixedDataFrame().set_index("C") @@ -1516,20 +1509,20 @@ def test_to_hdf_with_min_itemsize(self): @pytest.mark.parametrize( "format", [pytest.param("fixed", marks=xfail_non_writeable), "table"] ) - def test_to_hdf_errors(self, format): + def test_to_hdf_errors(self, format, setup_path): data = ["\ud800foo"] ser = pd.Series(data, index=pd.Index(data)) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: # GH 20835 ser.to_hdf(path, "table", format=format, errors="surrogatepass") result = pd.read_hdf(path, "table", errors="surrogatepass") tm.assert_series_equal(result, ser) - def test_append_with_data_columns(self): + def test_append_with_data_columns(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeTimeDataFrame() df.iloc[0, df.columns.get_loc("B")] = 1.0 _maybe_remove(store, "df") @@ -1570,7 +1563,7 @@ def check_col(key, name, size): == size ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df") store.append( "df", df_new, data_columns=["string"], min_itemsize={"string": 30} @@ -1585,7 +1578,7 @@ def check_col(key, name, size): ) check_col("df", "string", 30) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df_new["string2"] = "foobarbah" df_new["string_block1"] = "foobarbah1" df_new["string_block2"] = "foobarbah2" @@ -1600,7 +1593,7 @@ def check_col(key, name, size): check_col("df", "string2", 40) check_col("df", "values_block_1", 50) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # multiple data columns df_new = df.copy() df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0 @@ -1633,7 +1626,7 @@ def check_col(key, name, size): expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")] tm.assert_frame_equal(result, expected, check_index_type=False) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # doc example df_dc = df.copy() df_dc["string"] = "foo" @@ -1657,7 +1650,7 @@ def check_col(key, name, size): expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")] tm.assert_frame_equal(result, expected, check_index_type=False) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # doc example part 2 np.random.seed(1234) index = date_range("1/1/2000", periods=8) @@ -1681,9 +1674,9 @@ def check_col(key, name, size): expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")] tm.assert_frame_equal(result, expected) - def test_create_table_index(self): + def test_create_table_index(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: with catch_warnings(record=True): @@ -1713,7 +1706,7 @@ def col(t, column): with pytest.raises(TypeError): store.create_table_index("f2") - def test_append_hierarchical(self): + def test_append_hierarchical(self, setup_path): index = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], @@ -1721,7 +1714,7 @@ def test_append_hierarchical(self): ) df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("mi", df) result = store.select("mi") tm.assert_frame_equal(result, df) @@ -1737,7 +1730,7 @@ def test_append_hierarchical(self): expected = df.reindex(columns=["A", "B"]) tm.assert_frame_equal(result, expected) - def test_column_multiindex(self): + def test_column_multiindex(self, setup_path): # GH 4710 # recreate multi-indexes properly @@ -1749,7 +1742,7 @@ def test_column_multiindex(self): if isinstance(expected.index, RangeIndex): expected.index = Int64Index(expected.index) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df", df) tm.assert_frame_equal( @@ -1767,7 +1760,7 @@ def test_column_multiindex(self): store.put("df3", df, format="table", data_columns=True) # appending multi-column on existing table (see GH 6167) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df2", df) store.append("df2", df) @@ -1781,18 +1774,18 @@ def test_column_multiindex(self): if isinstance(expected.index, RangeIndex): expected.index = Int64Index(expected.index) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df1", df, format="table") tm.assert_frame_equal( store["df1"], expected, check_index_type=True, check_column_type=True ) - def test_store_multiindex(self): + def test_store_multiindex(self, setup_path): # validate multi-index names # GH 5527 - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: def make_index(names=None): return MultiIndex.from_tuples( @@ -1858,7 +1851,7 @@ def make_index(names=None): store.append("df", df) tm.assert_frame_equal(store.select("df"), df) - def test_select_columns_in_where(self): + def test_select_columns_in_where(self, setup_path): # GH 6169 # recreate multi-indexes when columns is passed @@ -1872,7 +1865,7 @@ def test_select_columns_in_where(self): # With a DataFrame df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df", df, format="table") expected = df[["A"]] @@ -1882,29 +1875,29 @@ def test_select_columns_in_where(self): # With a Series s = Series(np.random.randn(10), index=index, name="A") - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("s", s, format="table") tm.assert_series_equal(store.select("s", where="columns=['A']"), s) - def test_mi_data_columns(self): + def test_mi_data_columns(self, setup_path): # GH 14435 idx = pd.MultiIndex.from_arrays( [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"] ) df = pd.DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df", df, data_columns=True) actual = store.select("df", where="id == 1") expected = df.iloc[[1], :] tm.assert_frame_equal(actual, expected) - def test_pass_spec_to_storer(self): + def test_pass_spec_to_storer(self, setup_path): df = tm.makeDataFrame() - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df", df) with pytest.raises(TypeError): store.select("df", columns=["A"]) @@ -1912,9 +1905,9 @@ def test_pass_spec_to_storer(self): store.select("df", where=[("columns=A")]) @xfail_non_writeable - def test_append_misc(self): + def test_append_misc(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeDataFrame() store.append("df", df, chunksize=1) result = store.select("df") @@ -1927,7 +1920,7 @@ def test_append_misc(self): # more chunksize in append tests def check(obj, comparator): for c in [10, 200, 1000]: - with ensure_clean_store(self.path, mode="w") as store: + with ensure_clean_store(setup_path, mode="w") as store: store.append("obj", obj, chunksize=c) result = store.select("obj") comparator(result, obj) @@ -1942,7 +1935,7 @@ def check(obj, comparator): check(df, tm.assert_frame_equal) # empty frame, GH4273 - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # 0 len df_empty = DataFrame(columns=list("ABC")) @@ -1962,9 +1955,9 @@ def check(obj, comparator): store.put("df2", df) assert_frame_equal(store.select("df2"), df) - def test_append_raise(self): + def test_append_raise(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # test append with invalid input to get good error messages @@ -2007,18 +2000,18 @@ def test_append_raise(self): with pytest.raises(ValueError): store.append("df", df) - def test_table_index_incompatible_dtypes(self): + def test_table_index_incompatible_dtypes(self, setup_path): df1 = DataFrame({"a": [1, 2, 3]}) df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3)) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("frame", df1, format="table") with pytest.raises(TypeError): store.put("frame", df2, format="table", append=True) - def test_table_values_dtypes_roundtrip(self): + def test_table_values_dtypes_roundtrip(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8") store.append("df_f8", df1) assert_series_equal(df1.dtypes, store["df_f8"].dtypes) @@ -2072,7 +2065,7 @@ def test_table_values_dtypes_roundtrip(self): expected = expected.sort_index() tm.assert_series_equal(result, expected) - def test_table_mixed_dtypes(self): + def test_table_mixed_dtypes(self, setup_path): # frame df = tm.makeDataFrame() @@ -2090,13 +2083,13 @@ def test_table_mixed_dtypes(self): df.loc[3:6, ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df1_mixed", df) tm.assert_frame_equal(store.select("df1_mixed"), df) - def test_unimplemented_dtypes_table_columns(self): + def test_unimplemented_dtypes_table_columns(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: dtypes = [("date", datetime.date(2001, 1, 2))] @@ -2114,7 +2107,7 @@ def test_unimplemented_dtypes_table_columns(self): df["datetime1"] = datetime.date(2001, 1, 2) df = df._consolidate()._convert(datetime=True) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # this fails because we have a date in the object block...... with pytest.raises(TypeError): store.append("df_unimplemented", df) @@ -2127,7 +2120,7 @@ def test_unimplemented_dtypes_table_columns(self): "exactly equal to 1.15.0: gh-22098" ), ) - def test_calendar_roundtrip_issue(self): + def test_calendar_roundtrip_issue(self, setup_path): # 8591 # doc example from tseries holiday section @@ -2145,7 +2138,7 @@ def test_calendar_roundtrip_issue(self): s = Series(dts.weekday, dts).map(Series("Mon Tue Wed Thu Fri Sat Sun".split())) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("fixed", s) result = store.select("fixed") @@ -2155,18 +2148,18 @@ def test_calendar_roundtrip_issue(self): result = store.select("table") assert_series_equal(result, s) - def test_roundtrip_tz_aware_index(self): + def test_roundtrip_tz_aware_index(self, setup_path): # GH 17618 time = pd.Timestamp("2000-01-01 01:00:00", tz="US/Eastern") df = pd.DataFrame(data=[0], index=[time]) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("frame", df, format="fixed") recons = store["frame"] tm.assert_frame_equal(recons, df) assert recons.index[0].value == 946706400000000000 - def test_append_with_timedelta(self): + def test_append_with_timedelta(self, setup_path): # GH 3577 # append timedelta @@ -2182,7 +2175,7 @@ def test_append_with_timedelta(self): df["C"] = df["A"] - df["B"] df.loc[3:5, "C"] = np.nan - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # table _maybe_remove(store, "df") @@ -2215,9 +2208,9 @@ def test_append_with_timedelta(self): result = store.select("df2") assert_frame_equal(result, df) - def test_remove(self): + def test_remove(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: ts = tm.makeTimeSeries() df = tm.makeDataFrame() @@ -2255,9 +2248,9 @@ def test_remove(self): del store["b"] assert len(store) == 0 - def test_invalid_terms(self): + def test_invalid_terms(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: with catch_warnings(record=True): @@ -2279,7 +2272,7 @@ def test_invalid_terms(self): store.select("df", "index>") # from the docs - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: dfq = DataFrame( np.random.randn(10, 4), columns=list("ABCD"), @@ -2294,7 +2287,7 @@ def test_invalid_terms(self): read_hdf(path, "dfq", where="A>0 or C>0") # catch the invalid reference - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: dfq = DataFrame( np.random.randn(10, 4), columns=list("ABCD"), @@ -2305,9 +2298,9 @@ def test_invalid_terms(self): with pytest.raises(ValueError): read_hdf(path, "dfq", where="A>0 or C>0") - def test_same_name_scoping(self): + def test_same_name_scoping(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: import pandas as pd @@ -2331,29 +2324,31 @@ def test_same_name_scoping(self): result = store.select("df", "index>datetime(2013,1,5)") assert_frame_equal(result, expected) - def test_series(self): + def test_series(self, setup_path): s = tm.makeStringSeries() - self._check_roundtrip(s, tm.assert_series_equal) + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) ts = tm.makeTimeSeries() - self._check_roundtrip(ts, tm.assert_series_equal) + self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path) ts2 = Series(ts.index, Index(ts.index, dtype=object)) - self._check_roundtrip(ts2, tm.assert_series_equal) + self._check_roundtrip(ts2, tm.assert_series_equal, path=setup_path) ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) - self._check_roundtrip(ts3, tm.assert_series_equal, check_index_type=False) + self._check_roundtrip( + ts3, tm.assert_series_equal, path=setup_path, check_index_type=False + ) - def test_float_index(self): + def test_float_index(self, setup_path): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) - self._check_roundtrip(s, tm.assert_series_equal) + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) @xfail_non_writeable - def test_tuple_index(self): + def test_tuple_index(self, setup_path): # GH #492 col = np.arange(10) @@ -2363,11 +2358,11 @@ def test_tuple_index(self): with catch_warnings(record=True): simplefilter("ignore", pd.errors.PerformanceWarning) - self._check_roundtrip(DF, tm.assert_frame_equal) + self._check_roundtrip(DF, tm.assert_frame_equal, path=setup_path) @xfail_non_writeable @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") - def test_index_types(self): + def test_index_types(self, setup_path): with catch_warnings(record=True): values = np.random.randn(2) @@ -2378,54 +2373,54 @@ def test_index_types(self): with catch_warnings(record=True): ser = Series(values, [0, "y"]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) with catch_warnings(record=True): ser = Series(values, [datetime.datetime.today(), 0]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) with catch_warnings(record=True): ser = Series(values, ["y", 0]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) with catch_warnings(record=True): ser = Series(values, [datetime.date.today(), "a"]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) with catch_warnings(record=True): ser = Series(values, [0, "y"]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) ser = Series(values, [datetime.datetime.today(), 0]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) ser = Series(values, ["y", 0]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) ser = Series(values, [datetime.date.today(), "a"]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) ser = Series(values, [1.23, "b"]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) ser = Series(values, [1, 1.53]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) ser = Series(values, [1, 5]) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) ser = Series( values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)] ) - self._check_roundtrip(ser, func) + self._check_roundtrip(ser, func, path=setup_path) - def test_timeseries_preepoch(self): + def test_timeseries_preepoch(self, setup_path): dr = bdate_range("1/1/1940", "1/1/1960") ts = Series(np.random.randn(len(dr)), index=dr) try: - self._check_roundtrip(ts, tm.assert_series_equal) + self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path) except OverflowError: pytest.skip("known failer on some windows platforms") @@ -2433,7 +2428,7 @@ def test_timeseries_preepoch(self): @pytest.mark.parametrize( "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)] ) - def test_frame(self, compression): + def test_frame(self, compression, setup_path): df = tm.makeDataFrame() @@ -2441,13 +2436,19 @@ def test_frame(self, compression): df.values[0, 0] = np.nan df.values[5, 3] = np.nan - self._check_roundtrip_table(df, tm.assert_frame_equal, compression=compression) - self._check_roundtrip(df, tm.assert_frame_equal, compression=compression) + self._check_roundtrip_table( + df, tm.assert_frame_equal, path=setup_path, compression=compression + ) + self._check_roundtrip( + df, tm.assert_frame_equal, path=setup_path, compression=compression + ) tdf = tm.makeTimeDataFrame() - self._check_roundtrip(tdf, tm.assert_frame_equal, compression=compression) + self._check_roundtrip( + tdf, tm.assert_frame_equal, path=setup_path, compression=compression + ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # not consolidated df["foo"] = np.random.randn(len(df)) store["df"] = df @@ -2455,38 +2456,38 @@ def test_frame(self, compression): assert recons._data.is_consolidated() # empty - self._check_roundtrip(df[:0], tm.assert_frame_equal) + self._check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path) @xfail_non_writeable - def test_empty_series_frame(self): + def test_empty_series_frame(self, setup_path): s0 = Series() s1 = Series(name="myseries") df0 = DataFrame() df1 = DataFrame(index=["a", "b", "c"]) df2 = DataFrame(columns=["d", "e", "f"]) - self._check_roundtrip(s0, tm.assert_series_equal) - self._check_roundtrip(s1, tm.assert_series_equal) - self._check_roundtrip(df0, tm.assert_frame_equal) - self._check_roundtrip(df1, tm.assert_frame_equal) - self._check_roundtrip(df2, tm.assert_frame_equal) + self._check_roundtrip(s0, tm.assert_series_equal, path=setup_path) + self._check_roundtrip(s1, tm.assert_series_equal, path=setup_path) + self._check_roundtrip(df0, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) @xfail_non_writeable @pytest.mark.parametrize( "dtype", [np.int64, np.float64, np.object, "m8[ns]", "M8[ns]"] ) - def test_empty_series(self, dtype): + def test_empty_series(self, dtype, setup_path): s = Series(dtype=dtype) - self._check_roundtrip(s, tm.assert_series_equal) + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) - def test_can_serialize_dates(self): + def test_can_serialize_dates(self, setup_path): rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - self._check_roundtrip(frame, tm.assert_frame_equal) + self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) - def test_store_hierarchical(self): + def test_store_hierarchical(self, setup_path): index = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], @@ -2494,39 +2495,39 @@ def test_store_hierarchical(self): ) frame = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) - self._check_roundtrip(frame, tm.assert_frame_equal) - self._check_roundtrip(frame.T, tm.assert_frame_equal) - self._check_roundtrip(frame["A"], tm.assert_series_equal) + self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path) # check that the names are stored - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["frame"] = frame recons = store["frame"] tm.assert_frame_equal(recons, frame) - def test_store_index_name(self): + def test_store_index_name(self, setup_path): df = tm.makeDataFrame() df.index.name = "foo" - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["frame"] = df recons = store["frame"] tm.assert_frame_equal(recons, df) - def test_store_index_name_with_tz(self): + def test_store_index_name_with_tz(self, setup_path): # GH 13884 df = pd.DataFrame({"A": [1, 2]}) df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788]) df.index = df.index.tz_localize("UTC") df.index.name = "foo" - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("frame", df, format="table") recons = store["frame"] tm.assert_frame_equal(recons, df) @pytest.mark.parametrize("table_format", ["table", "fixed"]) - def test_store_index_name_numpy_str(self, table_format): + def test_store_index_name_numpy_str(self, table_format, setup_path): # GH #13492 idx = pd.Index( pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]), @@ -2539,7 +2540,7 @@ def test_store_index_name_numpy_str(self, table_format): df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) # This used to fail, returning numpy strings instead of python strings. - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format=table_format) df2 = read_hdf(path, "df") @@ -2548,11 +2549,11 @@ def test_store_index_name_numpy_str(self, table_format): assert type(df2.index.name) == str assert type(df2.columns.name) == str - def test_store_series_name(self): + def test_store_series_name(self, setup_path): df = tm.makeDataFrame() series = df["A"] - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["series"] = series recons = store["series"] tm.assert_series_equal(recons, series) @@ -2561,7 +2562,7 @@ def test_store_series_name(self): @pytest.mark.parametrize( "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)] ) - def test_store_mixed(self, compression): + def test_store_mixed(self, compression, setup_path): def _make_one(): df = tm.makeDataFrame() df["obj1"] = "foo" @@ -2575,10 +2576,10 @@ def _make_one(): df1 = _make_one() df2 = _make_one() - self._check_roundtrip(df1, tm.assert_frame_equal) - self._check_roundtrip(df2, tm.assert_frame_equal) + self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["obj"] = df1 tm.assert_frame_equal(store["obj"], df1) store["obj"] = df2 @@ -2586,25 +2587,34 @@ def _make_one(): # check that can store Series of all of these types self._check_roundtrip( - df1["obj1"], tm.assert_series_equal, compression=compression + df1["obj1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, ) self._check_roundtrip( - df1["bool1"], tm.assert_series_equal, compression=compression + df1["bool1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, ) self._check_roundtrip( - df1["int1"], tm.assert_series_equal, compression=compression + df1["int1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, ) @pytest.mark.filterwarnings( "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning" ) - def test_select_with_dups(self): + def test_select_with_dups(self, setup_path): # single dtypes df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) df.index = date_range("20130101 9:30", periods=10, freq="T") - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df", df) result = store.select("df") @@ -2631,7 +2641,7 @@ def test_select_with_dups(self): ) df.index = date_range("20130101 9:30", periods=10, freq="T") - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df", df) result = store.select("df") @@ -2651,7 +2661,7 @@ def test_select_with_dups(self): assert_frame_equal(result, expected, by_blocks=True) # duplicates on both index and columns - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df", df) store.append("df", df) @@ -2660,18 +2670,18 @@ def test_select_with_dups(self): result = store.select("df", columns=["B", "A"]) assert_frame_equal(result, expected, by_blocks=True) - def test_overwrite_node(self): + def test_overwrite_node(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["a"] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() store["a"] = ts tm.assert_series_equal(store["a"], ts) - def test_select(self): + def test_select(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: with catch_warnings(record=True): @@ -2709,9 +2719,9 @@ def test_select(self): expected = df[df.A > 0].reindex(columns=["C", "D"]) tm.assert_frame_equal(expected, result) - def test_select_dtypes(self): + def test_select_dtypes(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # with a Timestamp data column (GH #2637) df = DataFrame( dict(ts=bdate_range("2012-01-01", periods=300), A=np.random.randn(300)) @@ -2767,7 +2777,7 @@ def test_select_dtypes(self): expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) tm.assert_frame_equal(expected, result) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # floats w/o NaN df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64") @@ -2806,7 +2816,7 @@ def test_select_dtypes(self): # test selection with comparison against numpy scalar # GH 11283 - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeDataFrame() expected = df[df["A"] > 0] @@ -2816,9 +2826,9 @@ def test_select_dtypes(self): result = store.select("df", where=["A>np_zero"]) tm.assert_frame_equal(expected, result) - def test_select_with_many_inputs(self): + def test_select_with_many_inputs(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame( dict( @@ -2869,10 +2879,10 @@ def test_select_with_many_inputs(self): tm.assert_frame_equal(expected, result) assert len(result) == 100 - def test_select_iterator(self): + def test_select_iterator(self, setup_path): # single table - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeTimeDataFrame(500) _maybe_remove(store, "df") @@ -2893,7 +2903,7 @@ def test_select_iterator(self): result = concat(results) tm.assert_frame_equal(result, expected) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeTimeDataFrame(500) df.to_hdf(path, "df_non_table") @@ -2904,7 +2914,7 @@ def test_select_iterator(self): with pytest.raises(TypeError): read_hdf(path, "df_non_table", iterator=True) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeTimeDataFrame(500) df.to_hdf(path, "df", format="table") @@ -2918,7 +2928,7 @@ def test_select_iterator(self): # multiple - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df1 = tm.makeTimeDataFrame(500) store.append("df1", df1, data_columns=True) @@ -2939,14 +2949,14 @@ def test_select_iterator(self): result = concat(results) tm.assert_frame_equal(expected, result) - def test_select_iterator_complete_8014(self): + def test_select_iterator_complete_8014(self, setup_path): # GH 8014 # using iterator and where clause chunksize = 1e4 # no iterator - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: expected = tm.makeTimeDataFrame(100064, "S") _maybe_remove(store, "df") @@ -2980,7 +2990,7 @@ def test_select_iterator_complete_8014(self): tm.assert_frame_equal(expected, result) # with iterator, full range - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: expected = tm.makeTimeDataFrame(100064, "S") _maybe_remove(store, "df") @@ -3014,14 +3024,14 @@ def test_select_iterator_complete_8014(self): result = concat(results) tm.assert_frame_equal(expected, result) - def test_select_iterator_non_complete_8014(self): + def test_select_iterator_non_complete_8014(self, setup_path): # GH 8014 # using iterator and where clause chunksize = 1e4 # with iterator, non complete range - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: expected = tm.makeTimeDataFrame(100064, "S") _maybe_remove(store, "df") @@ -3056,7 +3066,7 @@ def test_select_iterator_non_complete_8014(self): tm.assert_frame_equal(rexpected, result) # with iterator, empty where - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: expected = tm.makeTimeDataFrame(100064, "S") _maybe_remove(store, "df") @@ -3069,7 +3079,7 @@ def test_select_iterator_non_complete_8014(self): results = [s for s in store.select("df", where=where, chunksize=chunksize)] assert 0 == len(results) - def test_select_iterator_many_empty_frames(self): + def test_select_iterator_many_empty_frames(self, setup_path): # GH 8014 # using iterator and where clause can return many empty @@ -3077,7 +3087,7 @@ def test_select_iterator_many_empty_frames(self): chunksize = int(1e4) # with iterator, range limited to the first chunk - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: expected = tm.makeTimeDataFrame(100000, "S") _maybe_remove(store, "df") @@ -3134,14 +3144,14 @@ def test_select_iterator_many_empty_frames(self): @pytest.mark.filterwarnings( "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" ) - def test_retain_index_attributes(self): + def test_retain_index_attributes(self, setup_path): # GH 3499, losing frequency info on index recreation df = DataFrame( dict(A=Series(range(3), index=date_range("2000-1-1", periods=3, freq="H"))) ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: _maybe_remove(store, "data") store.put("data", df, format="table") @@ -3194,8 +3204,8 @@ def test_retain_index_attributes(self): @pytest.mark.filterwarnings( "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" ) - def test_retain_index_attributes2(self): - with ensure_clean_path(self.path) as path: + def test_retain_index_attributes2(self, setup_path): + with ensure_clean_path(setup_path) as path: with catch_warnings(record=True): @@ -3232,11 +3242,11 @@ def test_retain_index_attributes2(self): assert read_hdf(path, "data").index.name is None - def test_frame_select(self): + def test_frame_select(self, setup_path): df = tm.makeTimeDataFrame() - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("frame", df, format="table") date = df.index[len(df) // 2] @@ -3265,14 +3275,14 @@ def test_frame_select(self): # with pytest.raises(ValueError): # store.select('frame', [crit1, crit2]) - def test_frame_select_complex(self): + def test_frame_select_complex(self, setup_path): # select via complex criteria df = tm.makeTimeDataFrame() df["string"] = "foo" df.loc[df.index[0:4], "string"] = "bar" - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df", df, format="table", data_columns=["string"]) # empty @@ -3317,7 +3327,7 @@ def test_frame_select_complex(self): expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"]) tm.assert_frame_equal(result, expected) - def test_frame_select_complex2(self): + def test_frame_select_complex2(self, setup_path): with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths: @@ -3381,13 +3391,13 @@ def test_frame_select_complex2(self): store.close() - def test_invalid_filtering(self): + def test_invalid_filtering(self, setup_path): # can't use more than one filter (atm) df = tm.makeTimeDataFrame() - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df", df, format="table") # not implemented @@ -3398,9 +3408,9 @@ def test_invalid_filtering(self): with pytest.raises(NotImplementedError): store.select("df", "columns=['A','B'] & columns=['C']") - def test_string_select(self): + def test_string_select(self, setup_path): # GH 2973 - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = tm.makeTimeDataFrame() @@ -3440,11 +3450,11 @@ def test_string_select(self): expected = df[df.int != 2] assert_frame_equal(result, expected) - def test_read_column(self): + def test_read_column(self, setup_path): df = tm.makeTimeDataFrame() - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df") # GH 17912 @@ -3513,10 +3523,10 @@ def test_read_column(self): result = store.select_column("df4", "B") tm.assert_series_equal(result, expected) - def test_coordinates(self): + def test_coordinates(self, setup_path): df = tm.makeTimeDataFrame() - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df") store.append("df", df) @@ -3561,7 +3571,7 @@ def test_coordinates(self): tm.assert_frame_equal(result, expected) # pass array/mask as the coordinates - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame( np.random.randn(1000, 2), index=date_range("20000101", periods=1000) @@ -3617,13 +3627,13 @@ def test_coordinates(self): expected = df[5:10] tm.assert_frame_equal(result, expected) - def test_append_to_multiple(self): + def test_append_to_multiple(self, setup_path): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) df2["foo"] = "bar" df = concat([df1, df2], axis=1) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # exceptions with pytest.raises(ValueError): @@ -3647,13 +3657,13 @@ def test_append_to_multiple(self): expected = df[(df.A > 0) & (df.B > 0)] tm.assert_frame_equal(result, expected) - def test_append_to_multiple_dropna(self): + def test_append_to_multiple_dropna(self, setup_path): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # dropna=True should guarantee rows are synchronized store.append_to_multiple( @@ -3667,13 +3677,13 @@ def test_append_to_multiple_dropna(self): @pytest.mark.xfail( run=False, reason="append_to_multiple_dropna_false is not raising as failed" ) - def test_append_to_multiple_dropna_false(self): + def test_append_to_multiple_dropna_false(self, setup_path): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # dropna=False shouldn't synchronize row indexes store.append_to_multiple( @@ -3685,13 +3695,13 @@ def test_append_to_multiple_dropna_false(self): assert not store.select("df1a").index.equals(store.select("df2a").index) - def test_select_as_multiple(self): + def test_select_as_multiple(self, setup_path): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) df2["foo"] = "bar" - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # no tables stored with pytest.raises(Exception): @@ -3759,9 +3769,9 @@ def test_select_as_multiple(self): LooseVersion(tables.__version__) < LooseVersion("3.1.0"), reason=("tables version does not support fix for nan selection bug: GH 4858"), ) - def test_nan_selection_bug_4858(self): + def test_nan_selection_bug_4858(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame(dict(cols=range(6), values=range(6)), dtype="float64") df["cols"] = (df["cols"] + 10).apply(str) @@ -3777,9 +3787,9 @@ def test_nan_selection_bug_4858(self): result = store.select("df", where="values>2.0") assert_frame_equal(result, expected) - def test_start_stop_table(self): + def test_start_stop_table(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # table df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) @@ -3795,10 +3805,10 @@ def test_start_stop_table(self): expected = df.loc[30:40, ["A"]] tm.assert_frame_equal(result, expected) - def test_start_stop_multiple(self): + def test_start_stop_multiple(self, setup_path): # GH 16209 - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: df = DataFrame({"foo": [1, 2], "bar": [1, 2]}) @@ -3811,9 +3821,9 @@ def test_start_stop_multiple(self): expected = df.loc[[0], ["foo", "bar"]] tm.assert_frame_equal(result, expected) - def test_start_stop_fixed(self): + def test_start_stop_fixed(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # fixed, GH 8287 df = DataFrame( @@ -3851,13 +3861,13 @@ def test_start_stop_fixed(self): df.iloc[3:5, 1:3] = np.nan df.iloc[8:10, -2] = np.nan - def test_select_filter_corner(self): + def test_select_filter_corner(self, setup_path): df = DataFrame(np.random.randn(50, 100)) df.index = ["{c:3d}".format(c=c) for c in df.index] df.columns = ["{c:3d}".format(c=c) for c in df.columns] - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("frame", df, format="table") crit = "columns=df.columns[:75]" @@ -3868,7 +3878,7 @@ def test_select_filter_corner(self): result = store.select("frame", [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) - def test_path_pathlib(self): + def test_path_pathlib(self, setup_path): df = tm.makeDataFrame() result = tm.round_trip_pathlib( @@ -3877,7 +3887,7 @@ def test_path_pathlib(self): tm.assert_frame_equal(df, result) @pytest.mark.parametrize("start, stop", [(0, 2), (1, 2), (None, None)]) - def test_contiguous_mixed_data_table(self, start, stop): + def test_contiguous_mixed_data_table(self, start, stop, setup_path): # GH 17021 # ValueError when reading a contiguous mixed-data table ft. VLArray df = DataFrame( @@ -3887,13 +3897,13 @@ def test_contiguous_mixed_data_table(self, start, stop): } ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("test_dataset", df) result = store.select("test_dataset", start=start, stop=stop) assert_frame_equal(df[start:stop], result) - def test_path_pathlib_hdfstore(self): + def test_path_pathlib_hdfstore(self, setup_path): df = tm.makeDataFrame() def writer(path): @@ -3907,14 +3917,14 @@ def reader(path): result = tm.round_trip_pathlib(writer, reader) tm.assert_frame_equal(df, result) - def test_pickle_path_localpath(self): + def test_pickle_path_localpath(self, setup_path): df = tm.makeDataFrame() result = tm.round_trip_pathlib( lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df") ) tm.assert_frame_equal(df, result) - def test_path_localpath_hdfstore(self): + def test_path_localpath_hdfstore(self, setup_path): df = tm.makeDataFrame() def writer(path): @@ -3928,23 +3938,25 @@ def reader(path): result = tm.round_trip_localpath(writer, reader) tm.assert_frame_equal(df, result) - def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): + def _check_roundtrip(self, obj, comparator, path, compression=False, **kwargs): options = {} if compression: options["complib"] = _default_compressor - with ensure_clean_store(self.path, "w", **options) as store: + with ensure_clean_store(path, "w", **options) as store: store["obj"] = obj retrieved = store["obj"] comparator(retrieved, obj, **kwargs) - def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): + def _check_double_roundtrip( + self, obj, comparator, path, compression=False, **kwargs + ): options = {} if compression: options["complib"] = compression or _default_compressor - with ensure_clean_store(self.path, "w", **options) as store: + with ensure_clean_store(path, "w", **options) as store: store["obj"] = obj retrieved = store["obj"] comparator(retrieved, obj, **kwargs) @@ -3952,21 +3964,21 @@ def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): again = store["obj"] comparator(again, obj, **kwargs) - def _check_roundtrip_table(self, obj, comparator, compression=False): + def _check_roundtrip_table(self, obj, comparator, path, compression=False): options = {} if compression: options["complib"] = _default_compressor - with ensure_clean_store(self.path, "w", **options) as store: + with ensure_clean_store(path, "w", **options) as store: store.put("obj", obj, format="table") retrieved = store["obj"] comparator(retrieved, obj) - def test_multiple_open_close(self): + def test_multiple_open_close(self, setup_path): # gh-4409: open & close multiple times - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") @@ -3980,7 +3992,7 @@ def test_multiple_open_close(self): assert "CLOSED" in store.info() assert not store.is_open - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: if pytables._table_file_open_policy_is_strict: @@ -4042,7 +4054,7 @@ def test_multiple_open_close(self): assert not store2.is_open # ops on a closed store - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") @@ -4086,7 +4098,7 @@ def test_multiple_open_close(self): with pytest.raises(ClosedFileError, match="file is not open"): store.select("df") - def test_pytables_native_read(self, datapath): + def test_pytables_native_read(self, datapath, setup_path): with ensure_clean_store( datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r" ) as store: @@ -4096,7 +4108,7 @@ def test_pytables_native_read(self, datapath): @pytest.mark.skipif( is_platform_windows(), reason="native2 read fails oddly on windows" ) - def test_pytables_native2_read(self, datapath): + def test_pytables_native2_read(self, datapath, setup_path): with ensure_clean_store( datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r" ) as store: @@ -4105,7 +4117,7 @@ def test_pytables_native2_read(self, datapath): assert isinstance(d1, DataFrame) @xfail_non_writeable - def test_legacy_table_fixed_format_read_py2(self, datapath): + def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path): # GH 24510 # legacy table with fixed format written in Python 2 with ensure_clean_store( @@ -4119,7 +4131,7 @@ def test_legacy_table_fixed_format_read_py2(self, datapath): ) assert_frame_equal(expected, result) - def test_legacy_table_read_py2(self, datapath): + def test_legacy_table_read_py2(self, datapath, setup_path): # issue: 24925 # legacy table written in Python 2 with ensure_clean_store( @@ -4130,7 +4142,7 @@ def test_legacy_table_read_py2(self, datapath): expected = pd.DataFrame({"a": ["a", "b"], "b": [2, 3]}) assert_frame_equal(expected, result) - def test_copy(self): + def test_copy(self, setup_path): with catch_warnings(record=True): @@ -4179,7 +4191,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): df = tm.makeDataFrame() try: - path = create_tempfile(self.path) + path = create_tempfile(setup_path) st = HDFStore(path) st.append("df", df, data_columns=["A"]) st.close() @@ -4188,17 +4200,17 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): finally: safe_remove(path) - def test_store_datetime_fractional_secs(self): + def test_store_datetime_fractional_secs(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) store["a"] = series assert store["a"].index[0] == dt - def test_tseries_indices_series(self): + def test_tseries_indices_series(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) store["a"] = ser @@ -4217,9 +4229,9 @@ def test_tseries_indices_series(self): assert result.index.freq == ser.index.freq tm.assert_class_equal(result.index, ser.index, obj="series index") - def test_tseries_indices_frame(self): + def test_tseries_indices_frame(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) store["a"] = df @@ -4238,7 +4250,7 @@ def test_tseries_indices_frame(self): assert result.index.freq == df.index.freq tm.assert_class_equal(result.index, df.index, obj="dataframe index") - def test_unicode_index(self): + def test_unicode_index(self, setup_path): unicode_values = ["\u03c3", "\u03c3\u03c3"] @@ -4246,30 +4258,30 @@ def test_unicode_index(self): with catch_warnings(record=True): simplefilter("ignore", pd.errors.PerformanceWarning) s = Series(np.random.randn(len(unicode_values)), unicode_values) - self._check_roundtrip(s, tm.assert_series_equal) + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) - def test_unicode_longer_encoded(self): + def test_unicode_longer_encoded(self, setup_path): # GH 11234 char = "\u0394" df = pd.DataFrame({"A": [char]}) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df", df, format="table", encoding="utf-8") result = store.get("df") tm.assert_frame_equal(result, df) df = pd.DataFrame({"A": ["a", char], "B": ["b", "b"]}) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.put("df", df, format="table", encoding="utf-8") result = store.get("df") tm.assert_frame_equal(result, df) @xfail_non_writeable - def test_store_datetime_mixed(self): + def test_store_datetime_mixed(self, setup_path): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]}) ts = tm.makeTimeSeries() df["d"] = ts.index[:3] - self._check_roundtrip(df, tm.assert_frame_equal) + self._check_roundtrip(df, tm.assert_frame_equal, path=setup_path) # FIXME: don't leave commented-out code # def test_cant_write_multiindex_table(self): @@ -4281,14 +4293,14 @@ def test_store_datetime_mixed(self): # with pytest.raises(Exception): # store.put('foo', df, format='table') - def test_append_with_diff_col_name_types_raises_value_error(self): + def test_append_with_diff_col_name_types_raises_value_error(self, setup_path): df = DataFrame(np.random.randn(10, 1)) df2 = DataFrame({"a": np.random.randn(10)}) df3 = DataFrame({(1, 2): np.random.randn(10)}) df4 = DataFrame({("1", 2): np.random.randn(10)}) df5 = DataFrame({("1", 2, object): np.random.randn(10)}) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: name = "df_{}".format(tm.rands(10)) store.append(name, df) @@ -4296,7 +4308,7 @@ def test_append_with_diff_col_name_types_raises_value_error(self): with pytest.raises(ValueError): store.append(name, d) - def test_query_with_nested_special_character(self): + def test_query_with_nested_special_character(self, setup_path): df = DataFrame( { "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"], @@ -4304,14 +4316,14 @@ def test_query_with_nested_special_character(self): } ) expected = df[df.a == "test & test"] - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("test", df, format="table", data_columns=True) result = store.select("test", 'a = "test & test"') tm.assert_frame_equal(expected, result) - def test_categorical(self): + def test_categorical(self, setup_path): - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # Basic _maybe_remove(store, "s") @@ -4429,7 +4441,7 @@ def test_categorical(self): ): store.select("df3/meta/s/meta") - def test_categorical_conversion(self): + def test_categorical_conversion(self, setup_path): # GH13322 # Check that read_hdf with categorical columns doesn't return rows if @@ -4443,7 +4455,7 @@ def test_categorical_conversion(self): # We are expecting an empty DataFrame matching types of df expected = df.iloc[[], :] - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table", data_columns=True) result = read_hdf(path, "df", where="obsids=B") tm.assert_frame_equal(result, expected) @@ -4454,12 +4466,12 @@ def test_categorical_conversion(self): # We are expecting an empty DataFrame matching types of df expected = df.iloc[[], :] - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table", data_columns=True) result = read_hdf(path, "df", where="obsids=B") tm.assert_frame_equal(result, expected) - def test_categorical_nan_only_columns(self): + def test_categorical_nan_only_columns(self, setup_path): # GH18413 # Check that read_hdf with categorical columns with NaN-only values can # be read back. @@ -4475,15 +4487,15 @@ def test_categorical_nan_only_columns(self): df["b"] = df.b.astype("category") df["d"] = df.b.astype("category") expected = df - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table", data_columns=True) result = read_hdf(path, "df") tm.assert_frame_equal(result, expected) - def test_duplicate_column_name(self): + def test_duplicate_column_name(self, setup_path): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with pytest.raises(ValueError): df.to_hdf(path, "df", format="fixed") @@ -4494,30 +4506,30 @@ def test_duplicate_column_name(self): assert df.equals(other) assert other.equals(df) - def test_round_trip_equals(self): + def test_round_trip_equals(self, setup_path): # GH 9330 df = DataFrame({"B": [1, 2], "A": ["x", "y"]}) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table") other = read_hdf(path, "df") tm.assert_frame_equal(df, other) assert df.equals(other) assert other.equals(df) - def test_preserve_timedeltaindex_type(self): + def test_preserve_timedeltaindex_type(self, setup_path): # GH9635 # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve # the type of the index. df = DataFrame(np.random.normal(size=(10, 5))) df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example") - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["df"] = df assert_frame_equal(store["df"], df) - def test_columns_multiindex_modified(self): + def test_columns_multiindex_modified(self, setup_path): # BUG: 7212 # read_hdf store.select modified the passed columns parameters # when multi-indexed. @@ -4527,7 +4539,7 @@ def test_columns_multiindex_modified(self): df = df.set_index(keys="E", append=True) data_columns = df.index.names + df.columns.tolist() - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf( path, "df", @@ -4542,7 +4554,7 @@ def test_columns_multiindex_modified(self): assert cols2load_original == cols2load @ignore_natural_naming_warning - def test_to_hdf_with_object_column_names(self): + def test_to_hdf_with_object_column_names(self, setup_path): # GH9057 # Writing HDF5 table format should only work for string-like # column types @@ -4562,7 +4574,7 @@ def test_to_hdf_with_object_column_names(self): for index in types_should_fail: df = DataFrame(np.random.randn(10, 2), columns=index(2)) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with catch_warnings(record=True): msg = "cannot have non-object label DataIndexableCol" with pytest.raises(ValueError, match=msg): @@ -4570,7 +4582,7 @@ def test_to_hdf_with_object_column_names(self): for index in types_should_run: df = DataFrame(np.random.randn(10, 2), columns=index(2)) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with catch_warnings(record=True): df.to_hdf(path, "df", format="table", data_columns=True) result = pd.read_hdf( @@ -4578,14 +4590,14 @@ def test_to_hdf_with_object_column_names(self): ) assert len(result) - def test_read_hdf_open_store(self): + def test_read_hdf_open_store(self, setup_path): # GH10330 # No check for non-string path_or-buf, and no test of open store df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) df.index.name = "letters" df = df.set_index(keys="E", append=True) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", mode="w") direct = read_hdf(path, "df") store = HDFStore(path, mode="r") @@ -4594,12 +4606,12 @@ def test_read_hdf_open_store(self): assert store.is_open store.close() - def test_read_hdf_iterator(self): + def test_read_hdf_iterator(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) df.index.name = "letters" df = df.set_index(keys="E", append=True) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", mode="w", format="t") direct = read_hdf(path, "df") iterator = read_hdf(path, "df", iterator=True) @@ -4608,10 +4620,10 @@ def test_read_hdf_iterator(self): tm.assert_frame_equal(direct, indirect) iterator.store.close() - def test_read_hdf_errors(self): + def test_read_hdf_errors(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with pytest.raises(IOError): read_hdf(path, "key") @@ -4626,20 +4638,20 @@ def test_read_hdf_generic_buffer_errors(self): with pytest.raises(NotImplementedError): read_hdf(BytesIO(b""), "df") - def test_invalid_complib(self): + def test_invalid_complib(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with pytest.raises(ValueError): df.to_hdf(path, "df", complib="foolib") # GH10443 - def test_read_nokey(self): + def test_read_nokey(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) # Categorical dtype not supported for "fixed" format. So no need # to test with that dtype in the dataframe here. - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", mode="a") reread = read_hdf(path) assert_frame_equal(df, reread) @@ -4648,11 +4660,11 @@ def test_read_nokey(self): with pytest.raises(ValueError): read_hdf(path) - def test_read_nokey_table(self): + def test_read_nokey_table(self, setup_path): # GH13231 df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")}) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", mode="a", format="table") reread = read_hdf(path) assert_frame_equal(df, reread) @@ -4661,8 +4673,8 @@ def test_read_nokey_table(self): with pytest.raises(ValueError): read_hdf(path) - def test_read_nokey_empty(self): - with ensure_clean_path(self.path) as path: + def test_read_nokey_empty(self, setup_path): + with ensure_clean_path(setup_path) as path: store = HDFStore(path) store.close() @@ -4670,7 +4682,7 @@ def test_read_nokey_empty(self): read_hdf(path) @td.skip_if_no("pathlib") - def test_read_from_pathlib_path(self): + def test_read_from_pathlib_path(self, setup_path): # GH11773 from pathlib import Path @@ -4678,7 +4690,7 @@ def test_read_from_pathlib_path(self): expected = DataFrame( np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) - with ensure_clean_path(self.path) as filename: + with ensure_clean_path(setup_path) as filename: path_obj = Path(filename) expected.to_hdf(path_obj, "df", mode="a") @@ -4687,7 +4699,7 @@ def test_read_from_pathlib_path(self): tm.assert_frame_equal(expected, actual) @td.skip_if_no("py.path") - def test_read_from_py_localpath(self): + def test_read_from_py_localpath(self, setup_path): # GH11773 from py.path import local as LocalPath @@ -4695,7 +4707,7 @@ def test_read_from_py_localpath(self): expected = DataFrame( np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) - with ensure_clean_path(self.path) as filename: + with ensure_clean_path(setup_path) as filename: path_obj = LocalPath(filename) expected.to_hdf(path_obj, "df", mode="a") @@ -4703,11 +4715,11 @@ def test_read_from_py_localpath(self): tm.assert_frame_equal(expected, actual) - def test_query_long_float_literal(self): + def test_query_long_float_literal(self, setup_path): # GH 14241 df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("test", df, format="table", data_columns=True) cutoff = 1000000000.0006 @@ -4724,7 +4736,7 @@ def test_query_long_float_literal(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) - def test_query_compare_column_type(self): + def test_query_compare_column_type(self, setup_path): # GH 15492 df = pd.DataFrame( { @@ -4736,7 +4748,7 @@ def test_query_compare_column_type(self): columns=["date", "real_date", "float", "int"], ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("test", df, format="table", data_columns=True) ts = pd.Timestamp("2014-01-01") # noqa @@ -4773,12 +4785,12 @@ def test_query_compare_column_type(self): tm.assert_frame_equal(expected, result) @pytest.mark.parametrize("format", ["fixed", "table"]) - def test_read_hdf_series_mode_r(self, format): + def test_read_hdf_series_mode_r(self, format, setup_path): # GH 16583 # Tests that reading a Series saved to an HDF file # still works if a mode='r' argument is supplied series = tm.makeFloatSeries() - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: series.to_hdf(path, key="data", format=format) result = pd.read_hdf(path, key="data", mode="r") tm.assert_series_equal(result, series) @@ -4836,26 +4848,26 @@ def test_select_empty_where(self, where): CategoricalIndex(list("abc")), ], ) - def test_to_hdf_multiindex_extension_dtype(self, idx): + def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path): # GH 7775 mi = MultiIndex.from_arrays([idx, idx]) df = pd.DataFrame(0, index=mi, columns=["a"]) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): df.to_hdf(path, "df") -class TestHDFComplexValues(Base): +class TestHDFComplexValues: # GH10447 - def test_complex_fixed(self): + def test_complex_fixed(self, setup_path): df = DataFrame( np.random.rand(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df") reread = read_hdf(path, "df") assert_frame_equal(df, reread) @@ -4865,19 +4877,19 @@ def test_complex_fixed(self): index=list("abcd"), columns=list("ABCDE"), ) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df") reread = read_hdf(path, "df") assert_frame_equal(df, reread) - def test_complex_table(self): + def test_complex_table(self, setup_path): df = DataFrame( np.random.rand(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table") reread = read_hdf(path, "df") assert_frame_equal(df, reread) @@ -4888,13 +4900,13 @@ def test_complex_table(self): columns=list("ABCDE"), ) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table", mode="w") reread = read_hdf(path, "df") assert_frame_equal(df, reread) @xfail_non_writeable - def test_complex_mixed_fixed(self): + def test_complex_mixed_fixed(self, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 ) @@ -4911,12 +4923,12 @@ def test_complex_mixed_fixed(self): }, index=list("abcd"), ) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df") reread = read_hdf(path, "df") assert_frame_equal(df, reread) - def test_complex_mixed_table(self): + def test_complex_mixed_table(self, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 ) @@ -4934,17 +4946,17 @@ def test_complex_mixed_table(self): index=list("abcd"), ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df", df, data_columns=["A", "B"]) result = store.select("df", where="A>2") assert_frame_equal(df.loc[df.A > 2], result) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table") reread = read_hdf(path, "df") assert_frame_equal(df, reread) - def test_complex_across_dimensions_fixed(self): + def test_complex_across_dimensions_fixed(self, setup_path): with catch_warnings(record=True): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list("abcd")) @@ -4953,12 +4965,12 @@ def test_complex_across_dimensions_fixed(self): objs = [s, df] comps = [tm.assert_series_equal, tm.assert_frame_equal] for obj, comp in zip(objs, comps): - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: obj.to_hdf(path, "obj", format="fixed") reread = read_hdf(path, "obj") comp(obj, reread) - def test_complex_across_dimensions(self): + def test_complex_across_dimensions(self, setup_path): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list("abcd")) df = DataFrame({"A": s, "B": s}) @@ -4968,12 +4980,12 @@ def test_complex_across_dimensions(self): objs = [df] comps = [tm.assert_frame_equal] for obj, comp in zip(objs, comps): - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: obj.to_hdf(path, "obj", format="table") reread = read_hdf(path, "obj") comp(obj, reread) - def test_complex_indexing_error(self): + def test_complex_indexing_error(self, setup_path): complex128 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 ) @@ -4981,36 +4993,37 @@ def test_complex_indexing_error(self): {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128}, index=list("abcd"), ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: with pytest.raises(TypeError): store.append("df", df, data_columns=["C"]) - def test_complex_series_error(self): + def test_complex_series_error(self, setup_path): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list("abcd")) - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with pytest.raises(TypeError): s.to_hdf(path, "obj", format="t") - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: s.to_hdf(path, "obj", format="t", index=False) reread = read_hdf(path, "obj") tm.assert_series_equal(s, reread) - def test_complex_append(self): + def test_complex_append(self, setup_path): df = DataFrame( {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)} ) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("df", df, data_columns=["b"]) store.append("df", df) result = store.select("df") assert_frame_equal(pd.concat([df, df], 0), result) -class TestTimezones(Base): +# @pytest.mark.usefixtures("setup_path") +class TestTimezones: def _compare_with_tz(self, a, b): tm.assert_frame_equal(a, b) @@ -5024,7 +5037,7 @@ def _compare_with_tz(self, a, b): "invalid tz comparison [{a_e}] [{b_e}]".format(a_e=a_e, b_e=b_e) ) - def test_append_with_timezones_dateutil(self): + def test_append_with_timezones_dateutil(self, setup_path): from datetime import timedelta @@ -5035,7 +5048,7 @@ def test_append_with_timezones_dateutil(self): gettz = lambda x: maybe_get_tz("dateutil/" + x) # as columns - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df_tz") df = DataFrame( @@ -5101,7 +5114,7 @@ def test_append_with_timezones_dateutil(self): store.append("df_tz", df) # as index - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # GH 4098 example df = DataFrame( @@ -5125,12 +5138,12 @@ def test_append_with_timezones_dateutil(self): result = store.select("df") assert_frame_equal(result, df) - def test_append_with_timezones_pytz(self): + def test_append_with_timezones_pytz(self, setup_path): from datetime import timedelta # as columns - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df_tz") df = DataFrame( @@ -5195,7 +5208,7 @@ def test_append_with_timezones_pytz(self): store.append("df_tz", df) # as index - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: # GH 4098 example df = DataFrame( @@ -5219,7 +5232,7 @@ def test_append_with_timezones_pytz(self): result = store.select("df") assert_frame_equal(result, df) - def test_tseries_select_index_column(self): + def test_tseries_select_index_column(self, setup_path): # GH7777 # selecting a UTC datetimeindex column did # not preserve UTC tzinfo set before storing @@ -5228,7 +5241,7 @@ def test_tseries_select_index_column(self): rng = date_range("1/1/2000", "1/30/2000") frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("frame", frame) result = store.select_column("frame", "index") assert rng.tz == DatetimeIndex(result.values).tz @@ -5237,7 +5250,7 @@ def test_tseries_select_index_column(self): rng = date_range("1/1/2000", "1/30/2000", tz="UTC") frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("frame", frame) result = store.select_column("frame", "index") assert rng.tz == result.dt.tz @@ -5246,13 +5259,13 @@ def test_tseries_select_index_column(self): rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store.append("frame", frame) result = store.select_column("frame", "index") assert rng.tz == result.dt.tz - def test_timezones_fixed(self): - with ensure_clean_store(self.path) as store: + def test_timezones_fixed(self, setup_path): + with ensure_clean_store(setup_path) as store: # index rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") @@ -5277,24 +5290,24 @@ def test_timezones_fixed(self): result = store["df"] assert_frame_equal(result, df) - def test_fixed_offset_tz(self): + def test_fixed_offset_tz(self, setup_path): rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: store["frame"] = frame recons = store["frame"] tm.assert_index_equal(recons.index, rng) assert rng.tz == recons.index.tz @td.skip_if_windows - def test_store_timezone(self): + def test_store_timezone(self, setup_path): # GH2852 # issue storing datetime.date with a timezone as it resets when read # back in a new timezone # original method - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: today = datetime.date(2013, 9, 10) df = DataFrame([1, 2, 3], index=[today, today, today]) @@ -5303,7 +5316,7 @@ def test_store_timezone(self): assert_frame_equal(result, df) # with tz setting - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: with set_timezone("EST5EDT"): today = datetime.date(2013, 9, 10) @@ -5315,7 +5328,7 @@ def test_store_timezone(self): assert_frame_equal(result, df) - def test_legacy_datetimetz_object(self, datapath): + def test_legacy_datetimetz_object(self, datapath, setup_path): # legacy from < 0.17.0 # 8260 expected = DataFrame( @@ -5331,9 +5344,9 @@ def test_legacy_datetimetz_object(self, datapath): result = store["df"] assert_frame_equal(result, expected) - def test_dst_transitions(self): + def test_dst_transitions(self, setup_path): # make sure we are not failing on transitions - with ensure_clean_store(self.path) as store: + with ensure_clean_store(setup_path) as store: times = pd.date_range( "2013-10-26 23:00", "2013-10-27 01:00", @@ -5349,7 +5362,7 @@ def test_dst_transitions(self): result = store.select("df") assert_frame_equal(result, df) - def test_read_with_where_tz_aware_index(self): + def test_read_with_where_tz_aware_index(self, setup_path): # GH 11926 periods = 10 dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC") @@ -5357,13 +5370,13 @@ def test_read_with_where_tz_aware_index(self): expected = pd.DataFrame({"MYCOL": 0}, index=mi) key = "mykey" - with ensure_clean_path(self.path) as path: + with ensure_clean_path(setup_path) as path: with pd.HDFStore(path) as store: store.append(key, expected, format="table", append=True) result = pd.read_hdf(path, key, where="DATE > 20151130") assert_frame_equal(result, expected) - def test_py2_created_with_datetimez(self, datapath): + def test_py2_created_with_datetimez(self, datapath, setup_path): # The test HDF5 file was created in Python 2, but could not be read in # Python 3. #