From d81fef8787d764eac267492248d81069bc75086e Mon Sep 17 00:00:00 2001 From: moink Date: Mon, 21 Dec 2020 13:22:44 +0100 Subject: [PATCH 1/4] TEST: GH30999 Add match=msg to all "with pytest.raises" in pandas/tests/io/pytables/test_store.py --- pandas/tests/io/pytables/test_store.py | 262 +++++++++++++++++-------- 1 file changed, 176 insertions(+), 86 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index b35414724d946..2f85b2517b0af 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -483,11 +483,12 @@ def test_mode(self, setup_path): def check(mode): + msg = r"[\S]* does not exist" with ensure_clean_path(setup_path) as path: # constructor if mode in ["r", "r+"]: - with pytest.raises(IOError): + with pytest.raises(IOError, match=msg): HDFStore(path, mode=mode) else: @@ -499,7 +500,7 @@ def check(mode): # context if mode in ["r", "r+"]: - with pytest.raises(IOError): + with pytest.raises(IOError, match=msg): with HDFStore(path, mode=mode) as store: pass else: @@ -510,7 +511,7 @@ def check(mode): # conv write if mode in ["r", "r+"]: - with pytest.raises(IOError): + with pytest.raises(IOError, match=msg): df.to_hdf(path, "df", mode=mode) df.to_hdf(path, "df", mode="w") else: @@ -549,8 +550,9 @@ def test_reopen_handle(self, setup_path): store = HDFStore(path, mode="a") store["a"] = tm.makeTimeSeries() + msg = r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the current file!" # invalid mode change - with pytest.raises(PossibleDataLossError): + with pytest.raises(PossibleDataLossError, match=msg): store.open("w") store.close() @@ -713,7 +715,8 @@ def test_getattr(self, setup_path): # errors for x in ["d", "mode", "path", "handle", "complib"]: - with pytest.raises(AttributeError): + msg = f"'HDFStore' object has no attribute '{x}'" + with pytest.raises(AttributeError, match=msg): getattr(store, x) # not stores @@ -734,17 +737,18 @@ def test_put(self, setup_path): store.put("c", df[:10], format="table") # not OK, not a table - with pytest.raises(ValueError): + msg = "Can only append to Tables" + with pytest.raises(ValueError, match=msg): store.put("b", df[10:], append=True) # node does not currently exist, test _is_table_type returns False # in this case _maybe_remove(store, "f") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.put("f", df[10:], append=True) # can't put to a table (use append instead) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.put("c", df[10:], append=True) # overwrite table @@ -787,7 +791,8 @@ def test_put_compression(self, setup_path): tm.assert_frame_equal(store["c"], df) # can't compress if format='fixed' - with pytest.raises(ValueError): + msg = "Compression not supported on Fixed format stores" + with pytest.raises(ValueError, match=msg): store.put("b", df, format="fixed", complib="zlib") @td.skip_if_windows_python_3 @@ -797,7 +802,8 @@ def test_put_compression_blosc(self, setup_path): with ensure_clean_store(setup_path) as store: # can't compress if format='fixed' - with pytest.raises(ValueError): + msg = "Compression not supported on Fixed format stores" + with pytest.raises(ValueError, match=msg): store.put("b", df, format="fixed", complib="blosc") store.put("c", df, format="table", complib="blosc") @@ -1329,7 +1335,9 @@ def test_append_frame_column_oriented(self, setup_path): tm.assert_frame_equal(expected, result) # this isn't supported - with pytest.raises(TypeError): + msg = re.escape("passing a filterable condition to a non-table indexer " + "[Filter: Not Initialized]") + with pytest.raises(TypeError, match=msg): store.select("df1", "columns=A and index>df.index[4]") def test_append_with_different_block_ordering(self, setup_path): @@ -1367,12 +1375,16 @@ def test_append_with_different_block_ordering(self, setup_path): # store additional fields in different blocks df["int16_2"] = Series([1] * len(df), dtype="int16") - with pytest.raises(ValueError): + msg = re.escape("cannot match existing table structure for [int16] on " + "appending data") + with pytest.raises(ValueError, match=msg): store.append("df", df) # store multiple additional fields in different blocks df["float_3"] = Series([1.0] * len(df), dtype="float64") - with pytest.raises(ValueError): + msg = re.escape("cannot match existing table structure for [A,B] on " + "appending data") + with pytest.raises(ValueError, match=msg): store.append("df", df) def test_append_with_strings(self, setup_path): @@ -1410,7 +1422,12 @@ def check_col(key, name, size): df_new = DataFrame( [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]] ) - with pytest.raises(ValueError): + msg = (r"Trying to store a string with len \[26\] in " + r"\[values_block_1\] column but\n" + r"this column has a limit of \[15\]!\n" + "Consider using min_itemsize to preset the sizes on these " + "columns") + with pytest.raises(ValueError, match=msg): store.append("df_new", df_new) # min_itemsize on Series index (GH 11412) @@ -1488,7 +1505,9 @@ def check_col(key, name, size): # invalid min_itemsize keys df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"]) _maybe_remove(store, "df") - with pytest.raises(ValueError): + msg = re.escape("min_itemsize has the key [foo] which is not an axis or " + "data_column") + with pytest.raises(ValueError, match=msg): store.append("df", df, min_itemsize={"foo": 20, "foobar": 20}) def test_append_with_empty_string(self, setup_path): @@ -1718,7 +1737,8 @@ def col(t, column): # try to index a non-table _maybe_remove(store, "f2") store.put("f2", df) - with pytest.raises(TypeError): + msg = "cannot create table index on a Fixed format store" + with pytest.raises(TypeError, match=msg): store.create_table_index("f2") def test_create_table_index_data_columns_argument(self, setup_path): @@ -1800,9 +1820,13 @@ def test_column_multiindex(self, setup_path): store["df1"], expected, check_index_type=True, check_column_type=True ) - with pytest.raises(ValueError): + msg = re.escape("cannot use a multi-index on axis [1] with data_columns " + "['A']") + with pytest.raises(ValueError, match=msg): store.put("df2", df, format="table", data_columns=["A"]) - with pytest.raises(ValueError): + msg = re.escape("cannot use a multi-index on axis [1] with data_columns" + " True") + with pytest.raises(ValueError, match=msg): store.put("df3", df, format="table", data_columns=True) # appending multi-column on existing table (see GH 6167) @@ -1874,7 +1898,8 @@ def make_index(names=None): columns=["a", "b"], index=make_index(["date", "a", "t"]), ) - with pytest.raises(ValueError): + msg = "duplicate names/columns in the multi-index when storing as a table" + with pytest.raises(ValueError, match=msg): store.append("df", df) # dup within level @@ -1884,7 +1909,7 @@ def make_index(names=None): columns=["a", "b"], index=make_index(["date", "date", "date"]), ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.append("df", df) # fully names @@ -1945,9 +1970,13 @@ def test_pass_spec_to_storer(self, setup_path): with ensure_clean_store(setup_path) as store: store.put("df", df) - with pytest.raises(TypeError): + msg = ("cannot pass a column specification when reading a Fixed format " + "store. this store must be selected in its entirety") + with pytest.raises(TypeError, match=msg): store.select("df", columns=["A"]) - with pytest.raises(TypeError): + msg = ("cannot pass a where specification when reading from a Fixed " + "format store. this store must be selected in its entirety") + with pytest.raises(TypeError, match=msg): store.select("df", where=[("columns=A")]) def test_append_misc(self, setup_path): @@ -2010,13 +2039,14 @@ def test_append_raise(self, setup_path): df = tm.makeDataFrame() df["invalid"] = [["a"]] * len(df) assert df.dtypes["invalid"] == np.object_ - with pytest.raises(TypeError): + msg = re.escape("object of type 'int' has no len()") + with pytest.raises(TypeError, match=msg): store.append("df", df) # multiple invalid columns df["invalid2"] = [["a"]] * len(df) df["invalid3"] = [["a"]] * len(df) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): store.append("df", df) # datetime with embedded nans as object @@ -2026,15 +2056,19 @@ def test_append_raise(self, setup_path): s[0:5] = np.nan df["invalid"] = s assert df.dtypes["invalid"] == np.object_ - with pytest.raises(TypeError): + msg = "too many timezones in this block, create separate data columns" + with pytest.raises(TypeError, match=msg): store.append("df", df) # directly ndarray - with pytest.raises(TypeError): + msg = "value must be None, Series, or DataFrame" + with pytest.raises(TypeError, match=msg): store.append("df", np.arange(10)) # series directly - with pytest.raises(TypeError): + msg = re.escape("cannot properly create the storer for: " + "[group->df,value->]") + with pytest.raises(TypeError, match=msg): store.append("df", Series(np.arange(10))) # appending an incompatible table @@ -2042,7 +2076,10 @@ def test_append_raise(self, setup_path): store.append("df", df) df["foo"] = "foo" - with pytest.raises(ValueError): + msg = re.escape("invalid combination of [non_index_axes] on appending data " + "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table " + "[(1, ['A', 'B', 'C', 'D'])]") + with pytest.raises(ValueError, match=msg): store.append("df", df) def test_table_index_incompatible_dtypes(self, setup_path): @@ -2051,7 +2088,8 @@ def test_table_index_incompatible_dtypes(self, setup_path): with ensure_clean_store(setup_path) as store: store.put("frame", df1, format="table") - with pytest.raises(TypeError): + msg = re.escape("incompatible kind in col [integer - datetime64]") + with pytest.raises(TypeError, match=msg): store.put("frame", df2, format="table", append=True) def test_table_values_dtypes_roundtrip(self, setup_path): @@ -2066,7 +2104,13 @@ def test_table_values_dtypes_roundtrip(self, setup_path): tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes) # incompatible dtype - with pytest.raises(ValueError): + msg = re.escape("invalid combination of [values_axes] on appending data " + "[name->values_block_0,cname->values_block_0," + "dtype->float64,kind->float,shape->(1, 3)] vs " + "current table [name->values_block_0," + "cname->values_block_0,dtype->int64,kind->integer," + "shape->None]") + with pytest.raises(ValueError, match=msg): store.append("df_i8", df1) # check creation/storage/retrieval of float32 (a bit hacky to @@ -2142,7 +2186,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path): for n, f in dtypes: df = tm.makeDataFrame() df[n] = f - with pytest.raises(TypeError): + msg = re.escape(f"[{n}] is not implemented as a table column") + with pytest.raises(TypeError, match=msg): store.append(f"df1_{n}", df) # frame @@ -2154,7 +2199,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path): with ensure_clean_store(setup_path) as store: # this fails because we have a date in the object block...... - with pytest.raises(TypeError): + msg = "object of type 'int' has no len()" + with pytest.raises(TypeError, match=msg): store.append("df_unimplemented", df) def test_calendar_roundtrip_issue(self, setup_path): @@ -2287,14 +2333,19 @@ def test_invalid_terms(self, setup_path): store.put("df", df, format="table") # some invalid terms - with pytest.raises(TypeError): + msg = re.escape("__init__() missing 1 required positional argument: " + "'where'") + with pytest.raises(TypeError, match=msg): Term() # more invalid - with pytest.raises(ValueError): + msg = re.escape("cannot process expression [df.index[3]], " + "[2000-01-06 00:00:00] is not a valid condition") + with pytest.raises(ValueError, match=msg): store.select("df", "df.index[3]") - with pytest.raises(SyntaxError): + msg = "invalid syntax" + with pytest.raises(SyntaxError, match=msg): store.select("df", "index>") # from the docs @@ -2321,7 +2372,12 @@ def test_invalid_terms(self, setup_path): ) dfq.to_hdf(path, "dfq", format="table") - with pytest.raises(ValueError): + msg = (r"The passed where expression: A>0 or C>0\n\s*" + r"contains an invalid variable reference\n\s*" + r"all of the variable references must be a reference to\n\s*" + r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*" + r"The currently defined references are: index,columns\n") + with pytest.raises(ValueError, match=msg): read_hdf(path, "dfq", where="A>0 or C>0") def test_same_name_scoping(self, setup_path): @@ -2917,10 +2973,11 @@ def test_select_iterator(self, setup_path): df = tm.makeTimeDataFrame(500) df.to_hdf(path, "df_non_table") - with pytest.raises(TypeError): + msg = "can only use an iterator or chunksize on a table" + with pytest.raises(TypeError, match=msg): read_hdf(path, "df_non_table", chunksize=100) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): read_hdf(path, "df_non_table", iterator=True) with ensure_clean_path(setup_path) as path: @@ -3264,7 +3321,8 @@ def test_frame_select(self, setup_path): # invalid terms df = tm.makeTimeDataFrame() store.append("df_time", df) - with pytest.raises(ValueError): + msg = "could not convert string to Timestamp" + with pytest.raises(ValueError, match=msg): store.select("df_time", "index>0") # can't select if not written as table @@ -3311,7 +3369,8 @@ def test_frame_select_complex(self, setup_path): tm.assert_frame_equal(result, expected) # invert not implemented in numexpr :( - with pytest.raises(NotImplementedError): + msg = "cannot use an invert condition when passing to numexpr" + with pytest.raises(NotImplementedError, match=msg): store.select("df", '~(string="bar")') # invert ok for filters @@ -3397,12 +3456,13 @@ def test_invalid_filtering(self, setup_path): with ensure_clean_store(setup_path) as store: store.put("df", df, format="table") + msg = "unable to collapse Joint Filters" # not implemented - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError, match=msg): store.select("df", "columns=['A'] | columns=['B']") # in theory we could deal with this - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError, match=msg): store.select("df", "columns=['A','B'] & columns=['C']") def test_string_select(self, setup_path): @@ -3467,7 +3527,10 @@ def test_read_column(self, setup_path): ): store.select_column("df", "foo") - with pytest.raises(Exception): + msg = re.escape( + "select_column() got an unexpected keyword argument 'where'" + ) + with pytest.raises(TypeError, match=msg): store.select_column("df", "index", where=["index>5"]) # valid @@ -3476,7 +3539,11 @@ def test_read_column(self, setup_path): assert isinstance(result, Series) # not a data indexable column - with pytest.raises(ValueError): + msg = re.escape( + "column [values_block_0] can not be extracted individually; " + "it is not data indexable" + ) + with pytest.raises(ValueError, match=msg): store.select_column("df", "values_block_0") # a data column @@ -3587,16 +3654,17 @@ def test_coordinates(self, setup_path): tm.assert_frame_equal(result, expected) # invalid - with pytest.raises(ValueError): + msg = "cannot process expression" + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df), dtype="float64")) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df) + 1)) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df)), start=5) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df)), start=5, stop=10) # selection with filter @@ -3633,15 +3701,18 @@ def test_append_to_multiple(self, setup_path): with ensure_clean_store(setup_path) as store: # exceptions - with pytest.raises(ValueError): + msg = "append_to_multiple requires a selector that is in passed dict" + with pytest.raises(ValueError, match=msg): store.append_to_multiple( {"df1": ["A", "B"], "df2": None}, df, selector="df3" ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3") - with pytest.raises(ValueError): + msg = ("append_to_multiple must have a dictionary specified as the way to " + "split the value") + with pytest.raises(ValueError, match=msg): store.append_to_multiple("df1", df, "df1") # regular operation @@ -3687,7 +3758,9 @@ def test_append_to_multiple_dropna_false(self, setup_path): {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False ) - with pytest.raises(ValueError): + # TODO Update error message to desired message for this case + msg = "Cannot select as multiple after appending with dropna=False" + with pytest.raises(ValueError, match=msg): store.select_as_multiple(["df1a", "df2a"]) assert not store.select("df1a").index.equals(store.select("df2a").index) @@ -3727,18 +3800,19 @@ def test_select_as_multiple(self, setup_path): with ensure_clean_store(setup_path) as store: + msg = "keys must be a list/tuple" # no tables stored - with pytest.raises(Exception): + with pytest.raises(TypeError, match=msg): store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") store.append("df1", df1, data_columns=["A", "B"]) store.append("df2", df2) # exceptions - with pytest.raises(Exception): + with pytest.raises(TypeError, match=msg): store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") - with pytest.raises(Exception): + with pytest.raises(TypeError, match=msg): store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1") msg = "'No object named df3 in the file'" @@ -3784,7 +3858,8 @@ def test_select_as_multiple(self, setup_path): # test exception for diff rows store.append("df3", tm.makeTimeDataFrame(nper=50)) - with pytest.raises(ValueError): + msg = "all tables must have exactly the same nrows!" + with pytest.raises(ValueError, match=msg): store.select_as_multiple( ["df1", "df3"], where=["A>0", "B>0"], selector="df1" ) @@ -4019,11 +4094,11 @@ def test_multiple_open_close(self, setup_path): with ensure_clean_path(setup_path) as path: if pytables._table_file_open_policy_is_strict: - # multiples store1 = HDFStore(path) - - with pytest.raises(ValueError): + msg = (r"The file [\S]* is already opened\. Please close it before " + r"reopening in write mode\.") + with pytest.raises(ValueError, match=msg): HDFStore(path) store1.close() @@ -4086,42 +4161,44 @@ def test_multiple_open_close(self, setup_path): store = HDFStore(path) store.close() - with pytest.raises(ClosedFileError): + msg = r"[\S]* file is not open!" + with pytest.raises(ClosedFileError, match=msg): store.keys() - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): "df" in store - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): len(store) - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store["df"] - with pytest.raises(AttributeError): - store.df - - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.select("df") - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.get("df") - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.append("df2", df) - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.put("df3", df) - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.get_storer("df2") - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.remove("df2") - with pytest.raises(ClosedFileError, match="file is not open"): + with pytest.raises(ClosedFileError, match=msg): store.select("df") + msg = "'HDFStore' object has no attribute 'df'" + with pytest.raises(AttributeError, match=msg): + store.df + def test_pytables_native_read(self, datapath, setup_path): with ensure_clean_store( datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r" @@ -4338,7 +4415,8 @@ def test_append_with_diff_col_name_types_raises_value_error(self, setup_path): store.append(name, df) for d in (df2, df3, df4, df5): - with pytest.raises(ValueError): + msg = re.escape("cannot match existing table structure for [0] on appending data") + with pytest.raises(ValueError, match=msg): store.append(name, d) def test_query_with_nested_special_character(self, setup_path): @@ -4460,7 +4538,8 @@ def test_categorical(self, setup_path): df3 = df.copy() df3["s"] = df3["s"].cat.remove_unused_categories() - with pytest.raises(ValueError): + msg = "cannot append a categorical with different categories to the existing" + with pytest.raises(ValueError, match=msg): store.append("df3", df3) # Remove, and make sure meta data is removed (its a recursive @@ -4529,7 +4608,8 @@ def test_duplicate_column_name(self, setup_path): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) with ensure_clean_path(setup_path) as path: - with pytest.raises(ValueError): + msg = "Columns index has to be unique for fixed format" + with pytest.raises(ValueError, match=msg): df.to_hdf(path, "df", format="fixed") df.to_hdf(path, "df", format="table") @@ -4655,24 +4735,30 @@ def test_read_hdf_errors(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) with ensure_clean_path(setup_path) as path: - with pytest.raises(IOError): + msg = r"File [\S]* does not exist" + with pytest.raises(IOError, match=msg): read_hdf(path, "key") df.to_hdf(path, "df") store = HDFStore(path, mode="r") store.close() - with pytest.raises(IOError): + msg = "The HDFStore must be open for reading." + with pytest.raises(IOError, match=msg): read_hdf(store, "df") def test_read_hdf_generic_buffer_errors(self): - with pytest.raises(NotImplementedError): + msg = "Support for generic buffers has not been implemented." + with pytest.raises(NotImplementedError, match=msg): read_hdf(BytesIO(b""), "df") def test_invalid_complib(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) with tm.ensure_clean(setup_path) as path: - with pytest.raises(ValueError): + msg = re.escape("complib only supports ['zlib', 'lzo', 'bzip2', 'blosc', " + "'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', " + "'blosc:zlib', 'blosc:zstd'] compression.") + with pytest.raises(ValueError, match=msg): df.to_hdf(path, "df", complib="foolib") # GH10443 @@ -4688,7 +4774,8 @@ def test_read_nokey(self, setup_path): tm.assert_frame_equal(df, reread) df.to_hdf(path, "df2", mode="a") - with pytest.raises(ValueError): + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): read_hdf(path) def test_read_nokey_table(self, setup_path): @@ -4701,15 +4788,16 @@ def test_read_nokey_table(self, setup_path): tm.assert_frame_equal(df, reread) df.to_hdf(path, "df2", mode="a", format="table") - with pytest.raises(ValueError): + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): read_hdf(path) def test_read_nokey_empty(self, setup_path): with ensure_clean_path(setup_path) as path: store = HDFStore(path) store.close() - - with pytest.raises(ValueError): + msg = re.escape("Dataset(s) incompatible with Pandas data types, not table, or no datasets found in HDF5 file.") + with pytest.raises(ValueError, match=msg): read_hdf(path) def test_read_from_pathlib_path(self, setup_path): @@ -4788,14 +4876,16 @@ def test_query_compare_column_type(self, setup_path): # non strings to string column always fail for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]: query = f"date {op} v" - with pytest.raises(TypeError): + msg = f"Cannot compare {v} of type {type(v)} to string column" + with pytest.raises(TypeError, match=msg): store.select("test", where=query) # strings to other columns must be convertible to type v = "a" for col in ["int", "float", "real_date"]: query = f"{col} {op} v" - with pytest.raises(ValueError): + msg = "could not convert string to " + with pytest.raises(ValueError, match=msg): store.select("test", where=query) for v, col in zip( From d52c073882f4581748c6e3e1d8fa028dc7deb21b Mon Sep 17 00:00:00 2001 From: moink Date: Mon, 21 Dec 2020 13:22:44 +0100 Subject: [PATCH 2/4] TEST: GH30999 Add match=msg to all "with pytest.raises" in pandas/tests/io/pytables/test_store.py --- pandas/tests/io/pytables/test_store.py | 302 ++++++++++++++++++------- 1 file changed, 216 insertions(+), 86 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index b35414724d946..b2ccd691453bb 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -483,11 +483,12 @@ def test_mode(self, setup_path): def check(mode): + msg = r"[\S]* does not exist" with ensure_clean_path(setup_path) as path: # constructor if mode in ["r", "r+"]: - with pytest.raises(IOError): + with pytest.raises(IOError, match=msg): HDFStore(path, mode=mode) else: @@ -499,7 +500,7 @@ def check(mode): # context if mode in ["r", "r+"]: - with pytest.raises(IOError): + with pytest.raises(IOError, match=msg): with HDFStore(path, mode=mode) as store: pass else: @@ -510,7 +511,7 @@ def check(mode): # conv write if mode in ["r", "r+"]: - with pytest.raises(IOError): + with pytest.raises(IOError, match=msg): df.to_hdf(path, "df", mode=mode) df.to_hdf(path, "df", mode="w") else: @@ -549,8 +550,12 @@ def test_reopen_handle(self, setup_path): store = HDFStore(path, mode="a") store["a"] = tm.makeTimeSeries() + msg = ( + r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the " + "current file!" + ) # invalid mode change - with pytest.raises(PossibleDataLossError): + with pytest.raises(PossibleDataLossError, match=msg): store.open("w") store.close() @@ -713,7 +718,8 @@ def test_getattr(self, setup_path): # errors for x in ["d", "mode", "path", "handle", "complib"]: - with pytest.raises(AttributeError): + msg = f"'HDFStore' object has no attribute '{x}'" + with pytest.raises(AttributeError, match=msg): getattr(store, x) # not stores @@ -734,17 +740,18 @@ def test_put(self, setup_path): store.put("c", df[:10], format="table") # not OK, not a table - with pytest.raises(ValueError): + msg = "Can only append to Tables" + with pytest.raises(ValueError, match=msg): store.put("b", df[10:], append=True) # node does not currently exist, test _is_table_type returns False # in this case _maybe_remove(store, "f") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.put("f", df[10:], append=True) # can't put to a table (use append instead) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.put("c", df[10:], append=True) # overwrite table @@ -787,7 +794,8 @@ def test_put_compression(self, setup_path): tm.assert_frame_equal(store["c"], df) # can't compress if format='fixed' - with pytest.raises(ValueError): + msg = "Compression not supported on Fixed format stores" + with pytest.raises(ValueError, match=msg): store.put("b", df, format="fixed", complib="zlib") @td.skip_if_windows_python_3 @@ -797,7 +805,8 @@ def test_put_compression_blosc(self, setup_path): with ensure_clean_store(setup_path) as store: # can't compress if format='fixed' - with pytest.raises(ValueError): + msg = "Compression not supported on Fixed format stores" + with pytest.raises(ValueError, match=msg): store.put("b", df, format="fixed", complib="blosc") store.put("c", df, format="table", complib="blosc") @@ -1329,7 +1338,11 @@ def test_append_frame_column_oriented(self, setup_path): tm.assert_frame_equal(expected, result) # this isn't supported - with pytest.raises(TypeError): + msg = re.escape( + "passing a filterable condition to a non-table indexer " + "[Filter: Not Initialized]" + ) + with pytest.raises(TypeError, match=msg): store.select("df1", "columns=A and index>df.index[4]") def test_append_with_different_block_ordering(self, setup_path): @@ -1367,12 +1380,18 @@ def test_append_with_different_block_ordering(self, setup_path): # store additional fields in different blocks df["int16_2"] = Series([1] * len(df), dtype="int16") - with pytest.raises(ValueError): + msg = re.escape( + "cannot match existing table structure for [int16] on " "appending data" + ) + with pytest.raises(ValueError, match=msg): store.append("df", df) # store multiple additional fields in different blocks df["float_3"] = Series([1.0] * len(df), dtype="float64") - with pytest.raises(ValueError): + msg = re.escape( + "cannot match existing table structure for [A,B] on " "appending data" + ) + with pytest.raises(ValueError, match=msg): store.append("df", df) def test_append_with_strings(self, setup_path): @@ -1410,7 +1429,14 @@ def check_col(key, name, size): df_new = DataFrame( [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]] ) - with pytest.raises(ValueError): + msg = ( + r"Trying to store a string with len \[26\] in " + r"\[values_block_1\] column but\n" + r"this column has a limit of \[15\]!\n" + "Consider using min_itemsize to preset the sizes on these " + "columns" + ) + with pytest.raises(ValueError, match=msg): store.append("df_new", df_new) # min_itemsize on Series index (GH 11412) @@ -1488,7 +1514,10 @@ def check_col(key, name, size): # invalid min_itemsize keys df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"]) _maybe_remove(store, "df") - with pytest.raises(ValueError): + msg = re.escape( + "min_itemsize has the key [foo] which is not an axis or " "data_column" + ) + with pytest.raises(ValueError, match=msg): store.append("df", df, min_itemsize={"foo": 20, "foobar": 20}) def test_append_with_empty_string(self, setup_path): @@ -1718,7 +1747,8 @@ def col(t, column): # try to index a non-table _maybe_remove(store, "f2") store.put("f2", df) - with pytest.raises(TypeError): + msg = "cannot create table index on a Fixed format store" + with pytest.raises(TypeError, match=msg): store.create_table_index("f2") def test_create_table_index_data_columns_argument(self, setup_path): @@ -1800,9 +1830,15 @@ def test_column_multiindex(self, setup_path): store["df1"], expected, check_index_type=True, check_column_type=True ) - with pytest.raises(ValueError): + msg = re.escape( + "cannot use a multi-index on axis [1] with data_columns " "['A']" + ) + with pytest.raises(ValueError, match=msg): store.put("df2", df, format="table", data_columns=["A"]) - with pytest.raises(ValueError): + msg = re.escape( + "cannot use a multi-index on axis [1] with data_columns" " True" + ) + with pytest.raises(ValueError, match=msg): store.put("df3", df, format="table", data_columns=True) # appending multi-column on existing table (see GH 6167) @@ -1874,7 +1910,8 @@ def make_index(names=None): columns=["a", "b"], index=make_index(["date", "a", "t"]), ) - with pytest.raises(ValueError): + msg = "duplicate names/columns in the multi-index when storing as a table" + with pytest.raises(ValueError, match=msg): store.append("df", df) # dup within level @@ -1884,7 +1921,7 @@ def make_index(names=None): columns=["a", "b"], index=make_index(["date", "date", "date"]), ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.append("df", df) # fully names @@ -1945,9 +1982,17 @@ def test_pass_spec_to_storer(self, setup_path): with ensure_clean_store(setup_path) as store: store.put("df", df) - with pytest.raises(TypeError): + msg = ( + "cannot pass a column specification when reading a Fixed format " + "store. this store must be selected in its entirety" + ) + with pytest.raises(TypeError, match=msg): store.select("df", columns=["A"]) - with pytest.raises(TypeError): + msg = ( + "cannot pass a where specification when reading from a Fixed " + "format store. this store must be selected in its entirety" + ) + with pytest.raises(TypeError, match=msg): store.select("df", where=[("columns=A")]) def test_append_misc(self, setup_path): @@ -2010,13 +2055,14 @@ def test_append_raise(self, setup_path): df = tm.makeDataFrame() df["invalid"] = [["a"]] * len(df) assert df.dtypes["invalid"] == np.object_ - with pytest.raises(TypeError): + msg = re.escape("object of type 'int' has no len()") + with pytest.raises(TypeError, match=msg): store.append("df", df) # multiple invalid columns df["invalid2"] = [["a"]] * len(df) df["invalid3"] = [["a"]] * len(df) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): store.append("df", df) # datetime with embedded nans as object @@ -2026,15 +2072,21 @@ def test_append_raise(self, setup_path): s[0:5] = np.nan df["invalid"] = s assert df.dtypes["invalid"] == np.object_ - with pytest.raises(TypeError): + msg = "too many timezones in this block, create separate data columns" + with pytest.raises(TypeError, match=msg): store.append("df", df) # directly ndarray - with pytest.raises(TypeError): + msg = "value must be None, Series, or DataFrame" + with pytest.raises(TypeError, match=msg): store.append("df", np.arange(10)) # series directly - with pytest.raises(TypeError): + msg = re.escape( + "cannot properly create the storer for: " + "[group->df,value->]" + ) + with pytest.raises(TypeError, match=msg): store.append("df", Series(np.arange(10))) # appending an incompatible table @@ -2042,7 +2094,12 @@ def test_append_raise(self, setup_path): store.append("df", df) df["foo"] = "foo" - with pytest.raises(ValueError): + msg = re.escape( + "invalid combination of [non_index_axes] on appending data " + "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table " + "[(1, ['A', 'B', 'C', 'D'])]" + ) + with pytest.raises(ValueError, match=msg): store.append("df", df) def test_table_index_incompatible_dtypes(self, setup_path): @@ -2051,7 +2108,8 @@ def test_table_index_incompatible_dtypes(self, setup_path): with ensure_clean_store(setup_path) as store: store.put("frame", df1, format="table") - with pytest.raises(TypeError): + msg = re.escape("incompatible kind in col [integer - datetime64]") + with pytest.raises(TypeError, match=msg): store.put("frame", df2, format="table", append=True) def test_table_values_dtypes_roundtrip(self, setup_path): @@ -2066,7 +2124,15 @@ def test_table_values_dtypes_roundtrip(self, setup_path): tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes) # incompatible dtype - with pytest.raises(ValueError): + msg = re.escape( + "invalid combination of [values_axes] on appending data " + "[name->values_block_0,cname->values_block_0," + "dtype->float64,kind->float,shape->(1, 3)] vs " + "current table [name->values_block_0," + "cname->values_block_0,dtype->int64,kind->integer," + "shape->None]" + ) + with pytest.raises(ValueError, match=msg): store.append("df_i8", df1) # check creation/storage/retrieval of float32 (a bit hacky to @@ -2142,7 +2208,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path): for n, f in dtypes: df = tm.makeDataFrame() df[n] = f - with pytest.raises(TypeError): + msg = re.escape(f"[{n}] is not implemented as a table column") + with pytest.raises(TypeError, match=msg): store.append(f"df1_{n}", df) # frame @@ -2154,7 +2221,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path): with ensure_clean_store(setup_path) as store: # this fails because we have a date in the object block...... - with pytest.raises(TypeError): + msg = "object of type 'int' has no len()" + with pytest.raises(TypeError, match=msg): store.append("df_unimplemented", df) def test_calendar_roundtrip_issue(self, setup_path): @@ -2287,14 +2355,22 @@ def test_invalid_terms(self, setup_path): store.put("df", df, format="table") # some invalid terms - with pytest.raises(TypeError): + msg = re.escape( + "__init__() missing 1 required positional argument: " "'where'" + ) + with pytest.raises(TypeError, match=msg): Term() # more invalid - with pytest.raises(ValueError): + msg = re.escape( + "cannot process expression [df.index[3]], " + "[2000-01-06 00:00:00] is not a valid condition" + ) + with pytest.raises(ValueError, match=msg): store.select("df", "df.index[3]") - with pytest.raises(SyntaxError): + msg = "invalid syntax" + with pytest.raises(SyntaxError, match=msg): store.select("df", "index>") # from the docs @@ -2321,7 +2397,14 @@ def test_invalid_terms(self, setup_path): ) dfq.to_hdf(path, "dfq", format="table") - with pytest.raises(ValueError): + msg = ( + r"The passed where expression: A>0 or C>0\n\s*" + r"contains an invalid variable reference\n\s*" + r"all of the variable references must be a reference to\n\s*" + r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*" + r"The currently defined references are: index,columns\n" + ) + with pytest.raises(ValueError, match=msg): read_hdf(path, "dfq", where="A>0 or C>0") def test_same_name_scoping(self, setup_path): @@ -2917,10 +3000,11 @@ def test_select_iterator(self, setup_path): df = tm.makeTimeDataFrame(500) df.to_hdf(path, "df_non_table") - with pytest.raises(TypeError): + msg = "can only use an iterator or chunksize on a table" + with pytest.raises(TypeError, match=msg): read_hdf(path, "df_non_table", chunksize=100) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): read_hdf(path, "df_non_table", iterator=True) with ensure_clean_path(setup_path) as path: @@ -3264,7 +3348,8 @@ def test_frame_select(self, setup_path): # invalid terms df = tm.makeTimeDataFrame() store.append("df_time", df) - with pytest.raises(ValueError): + msg = "could not convert string to Timestamp" + with pytest.raises(ValueError, match=msg): store.select("df_time", "index>0") # can't select if not written as table @@ -3311,7 +3396,8 @@ def test_frame_select_complex(self, setup_path): tm.assert_frame_equal(result, expected) # invert not implemented in numexpr :( - with pytest.raises(NotImplementedError): + msg = "cannot use an invert condition when passing to numexpr" + with pytest.raises(NotImplementedError, match=msg): store.select("df", '~(string="bar")') # invert ok for filters @@ -3397,12 +3483,13 @@ def test_invalid_filtering(self, setup_path): with ensure_clean_store(setup_path) as store: store.put("df", df, format="table") + msg = "unable to collapse Joint Filters" # not implemented - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError, match=msg): store.select("df", "columns=['A'] | columns=['B']") # in theory we could deal with this - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError, match=msg): store.select("df", "columns=['A','B'] & columns=['C']") def test_string_select(self, setup_path): @@ -3467,7 +3554,10 @@ def test_read_column(self, setup_path): ): store.select_column("df", "foo") - with pytest.raises(Exception): + msg = re.escape( + "select_column() got an unexpected keyword argument 'where'" + ) + with pytest.raises(TypeError, match=msg): store.select_column("df", "index", where=["index>5"]) # valid @@ -3476,7 +3566,11 @@ def test_read_column(self, setup_path): assert isinstance(result, Series) # not a data indexable column - with pytest.raises(ValueError): + msg = re.escape( + "column [values_block_0] can not be extracted individually; " + "it is not data indexable" + ) + with pytest.raises(ValueError, match=msg): store.select_column("df", "values_block_0") # a data column @@ -3587,16 +3681,17 @@ def test_coordinates(self, setup_path): tm.assert_frame_equal(result, expected) # invalid - with pytest.raises(ValueError): + msg = "cannot process expression" + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df), dtype="float64")) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df) + 1)) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df)), start=5) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.select("df", where=np.arange(len(df)), start=5, stop=10) # selection with filter @@ -3633,15 +3728,20 @@ def test_append_to_multiple(self, setup_path): with ensure_clean_store(setup_path) as store: # exceptions - with pytest.raises(ValueError): + msg = "append_to_multiple requires a selector that is in passed dict" + with pytest.raises(ValueError, match=msg): store.append_to_multiple( {"df1": ["A", "B"], "df2": None}, df, selector="df3" ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3") - with pytest.raises(ValueError): + msg = ( + "append_to_multiple must have a dictionary specified as the way to " + "split the value" + ) + with pytest.raises(ValueError, match=msg): store.append_to_multiple("df1", df, "df1") # regular operation @@ -3687,7 +3787,9 @@ def test_append_to_multiple_dropna_false(self, setup_path): {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False ) - with pytest.raises(ValueError): + # TODO Update error message to desired message for this case + msg = "Cannot select as multiple after appending with dropna=False" + with pytest.raises(ValueError, match=msg): store.select_as_multiple(["df1a", "df2a"]) assert not store.select("df1a").index.equals(store.select("df2a").index) @@ -3727,18 +3829,19 @@ def test_select_as_multiple(self, setup_path): with ensure_clean_store(setup_path) as store: + msg = "keys must be a list/tuple" # no tables stored - with pytest.raises(Exception): + with pytest.raises(TypeError, match=msg): store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") store.append("df1", df1, data_columns=["A", "B"]) store.append("df2", df2) # exceptions - with pytest.raises(Exception): + with pytest.raises(TypeError, match=msg): store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") - with pytest.raises(Exception): + with pytest.raises(TypeError, match=msg): store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1") msg = "'No object named df3 in the file'" @@ -3784,7 +3887,8 @@ def test_select_as_multiple(self, setup_path): # test exception for diff rows store.append("df3", tm.makeTimeDataFrame(nper=50)) - with pytest.raises(ValueError): + msg = "all tables must have exactly the same nrows!" + with pytest.raises(ValueError, match=msg): store.select_as_multiple( ["df1", "df3"], where=["A>0", "B>0"], selector="df1" ) @@ -4019,11 +4123,13 @@ def test_multiple_open_close(self, setup_path): with ensure_clean_path(setup_path) as path: if pytables._table_file_open_policy_is_strict: - # multiples store1 = HDFStore(path) - - with pytest.raises(ValueError): + msg = ( + r"The file [\S]* is already opened\. Please close it before " + r"reopening in write mode\." + ) + with pytest.raises(ValueError, match=msg): HDFStore(path) store1.close() @@ -4086,42 +4192,44 @@ def test_multiple_open_close(self, setup_path): store = HDFStore(path) store.close() - with pytest.raises(ClosedFileError): + msg = r"[\S]* file is not open!" + with pytest.raises(ClosedFileError, match=msg): store.keys() - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): "df" in store - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): len(store) - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store["df"] - with pytest.raises(AttributeError): - store.df - - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.select("df") - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.get("df") - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.append("df2", df) - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.put("df3", df) - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.get_storer("df2") - with pytest.raises(ClosedFileError): + with pytest.raises(ClosedFileError, match=msg): store.remove("df2") - with pytest.raises(ClosedFileError, match="file is not open"): + with pytest.raises(ClosedFileError, match=msg): store.select("df") + msg = "'HDFStore' object has no attribute 'df'" + with pytest.raises(AttributeError, match=msg): + store.df + def test_pytables_native_read(self, datapath, setup_path): with ensure_clean_store( datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r" @@ -4338,7 +4446,10 @@ def test_append_with_diff_col_name_types_raises_value_error(self, setup_path): store.append(name, df) for d in (df2, df3, df4, df5): - with pytest.raises(ValueError): + msg = re.escape( + "cannot match existing table structure for [0] on appending data" + ) + with pytest.raises(ValueError, match=msg): store.append(name, d) def test_query_with_nested_special_character(self, setup_path): @@ -4460,7 +4571,10 @@ def test_categorical(self, setup_path): df3 = df.copy() df3["s"] = df3["s"].cat.remove_unused_categories() - with pytest.raises(ValueError): + msg = ( + "cannot append a categorical with different categories to the existing" + ) + with pytest.raises(ValueError, match=msg): store.append("df3", df3) # Remove, and make sure meta data is removed (its a recursive @@ -4529,7 +4643,8 @@ def test_duplicate_column_name(self, setup_path): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) with ensure_clean_path(setup_path) as path: - with pytest.raises(ValueError): + msg = "Columns index has to be unique for fixed format" + with pytest.raises(ValueError, match=msg): df.to_hdf(path, "df", format="fixed") df.to_hdf(path, "df", format="table") @@ -4655,24 +4770,32 @@ def test_read_hdf_errors(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) with ensure_clean_path(setup_path) as path: - with pytest.raises(IOError): + msg = r"File [\S]* does not exist" + with pytest.raises(IOError, match=msg): read_hdf(path, "key") df.to_hdf(path, "df") store = HDFStore(path, mode="r") store.close() - with pytest.raises(IOError): + msg = "The HDFStore must be open for reading." + with pytest.raises(IOError, match=msg): read_hdf(store, "df") def test_read_hdf_generic_buffer_errors(self): - with pytest.raises(NotImplementedError): + msg = "Support for generic buffers has not been implemented." + with pytest.raises(NotImplementedError, match=msg): read_hdf(BytesIO(b""), "df") def test_invalid_complib(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) with tm.ensure_clean(setup_path) as path: - with pytest.raises(ValueError): + msg = re.escape( + "complib only supports ['zlib', 'lzo', 'bzip2', 'blosc', " + "'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', " + "'blosc:zlib', 'blosc:zstd'] compression." + ) + with pytest.raises(ValueError, match=msg): df.to_hdf(path, "df", complib="foolib") # GH10443 @@ -4688,7 +4811,8 @@ def test_read_nokey(self, setup_path): tm.assert_frame_equal(df, reread) df.to_hdf(path, "df2", mode="a") - with pytest.raises(ValueError): + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): read_hdf(path) def test_read_nokey_table(self, setup_path): @@ -4701,15 +4825,19 @@ def test_read_nokey_table(self, setup_path): tm.assert_frame_equal(df, reread) df.to_hdf(path, "df2", mode="a", format="table") - with pytest.raises(ValueError): + msg = "key must be provided when HDF5 file contains multiple datasets." + with pytest.raises(ValueError, match=msg): read_hdf(path) def test_read_nokey_empty(self, setup_path): with ensure_clean_path(setup_path) as path: store = HDFStore(path) store.close() - - with pytest.raises(ValueError): + msg = re.escape( + "Dataset(s) incompatible with Pandas data types, not table, or no " + "datasets found in HDF5 file." + ) + with pytest.raises(ValueError, match=msg): read_hdf(path) def test_read_from_pathlib_path(self, setup_path): @@ -4788,14 +4916,16 @@ def test_query_compare_column_type(self, setup_path): # non strings to string column always fail for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]: query = f"date {op} v" - with pytest.raises(TypeError): + msg = f"Cannot compare {v} of type {type(v)} to string column" + with pytest.raises(TypeError, match=msg): store.select("test", where=query) # strings to other columns must be convertible to type v = "a" for col in ["int", "float", "real_date"]: query = f"{col} {op} v" - with pytest.raises(ValueError): + msg = "could not convert string to " + with pytest.raises(ValueError, match=msg): store.select("test", where=query) for v, col in zip( From 920589f50f1f353e476c3cff592003db5c53e35d Mon Sep 17 00:00:00 2001 From: moink Date: Mon, 21 Dec 2020 14:23:32 +0100 Subject: [PATCH 3/4] Fix strings split in two and reformatted by black --- pandas/tests/io/pytables/test_store.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index b2ccd691453bb..02969b5e525ec 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1381,7 +1381,7 @@ def test_append_with_different_block_ordering(self, setup_path): # store additional fields in different blocks df["int16_2"] = Series([1] * len(df), dtype="int16") msg = re.escape( - "cannot match existing table structure for [int16] on " "appending data" + "cannot match existing table structure for [int16] on appending data" ) with pytest.raises(ValueError, match=msg): store.append("df", df) @@ -1389,7 +1389,7 @@ def test_append_with_different_block_ordering(self, setup_path): # store multiple additional fields in different blocks df["float_3"] = Series([1.0] * len(df), dtype="float64") msg = re.escape( - "cannot match existing table structure for [A,B] on " "appending data" + "cannot match existing table structure for [A,B] on appending data" ) with pytest.raises(ValueError, match=msg): store.append("df", df) @@ -1515,7 +1515,7 @@ def check_col(key, name, size): df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"]) _maybe_remove(store, "df") msg = re.escape( - "min_itemsize has the key [foo] which is not an axis or " "data_column" + "min_itemsize has the key [foo] which is not an axis or data_column" ) with pytest.raises(ValueError, match=msg): store.append("df", df, min_itemsize={"foo": 20, "foobar": 20}) @@ -1831,12 +1831,12 @@ def test_column_multiindex(self, setup_path): ) msg = re.escape( - "cannot use a multi-index on axis [1] with data_columns " "['A']" + "cannot use a multi-index on axis [1] with data_columns ['A']" ) with pytest.raises(ValueError, match=msg): store.put("df2", df, format="table", data_columns=["A"]) msg = re.escape( - "cannot use a multi-index on axis [1] with data_columns" " True" + "cannot use a multi-index on axis [1] with data_columns True" ) with pytest.raises(ValueError, match=msg): store.put("df3", df, format="table", data_columns=True) @@ -2356,7 +2356,7 @@ def test_invalid_terms(self, setup_path): # some invalid terms msg = re.escape( - "__init__() missing 1 required positional argument: " "'where'" + "__init__() missing 1 required positional argument: 'where'" ) with pytest.raises(TypeError, match=msg): Term() From 4e41e6f641e5dfcf7d31588f30d2fde1ad1f3724 Mon Sep 17 00:00:00 2001 From: moink Date: Mon, 21 Dec 2020 15:08:10 +0100 Subject: [PATCH 4/4] TST Fix match argument so it should work on all platforms - was failing on windows --- pandas/tests/io/pytables/test_store.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 02969b5e525ec..274efda55414c 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4790,11 +4790,7 @@ def test_read_hdf_generic_buffer_errors(self): def test_invalid_complib(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) with tm.ensure_clean(setup_path) as path: - msg = re.escape( - "complib only supports ['zlib', 'lzo', 'bzip2', 'blosc', " - "'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', " - "'blosc:zlib', 'blosc:zstd'] compression." - ) + msg = r"complib only supports \[.*\] compression." with pytest.raises(ValueError, match=msg): df.to_hdf(path, "df", complib="foolib")