From d81fef8787d764eac267492248d81069bc75086e Mon Sep 17 00:00:00 2001
From: moink <theresa.robinson@gmail.com>
Date: Mon, 21 Dec 2020 13:22:44 +0100
Subject: [PATCH 1/4] TEST: GH30999 Add match=msg to all "with pytest.raises"
 in pandas/tests/io/pytables/test_store.py

---
 pandas/tests/io/pytables/test_store.py | 262 +++++++++++++++++--------
 1 file changed, 176 insertions(+), 86 deletions(-)

diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index b35414724d946..2f85b2517b0af 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -483,11 +483,12 @@ def test_mode(self, setup_path):
 
         def check(mode):
 
+            msg = r"[\S]* does not exist"
             with ensure_clean_path(setup_path) as path:
 
                 # constructor
                 if mode in ["r", "r+"]:
-                    with pytest.raises(IOError):
+                    with pytest.raises(IOError, match=msg):
                         HDFStore(path, mode=mode)
 
                 else:
@@ -499,7 +500,7 @@ def check(mode):
 
                 # context
                 if mode in ["r", "r+"]:
-                    with pytest.raises(IOError):
+                    with pytest.raises(IOError, match=msg):
                         with HDFStore(path, mode=mode) as store:
                             pass
                 else:
@@ -510,7 +511,7 @@ def check(mode):
 
                 # conv write
                 if mode in ["r", "r+"]:
-                    with pytest.raises(IOError):
+                    with pytest.raises(IOError, match=msg):
                         df.to_hdf(path, "df", mode=mode)
                     df.to_hdf(path, "df", mode="w")
                 else:
@@ -549,8 +550,9 @@ def test_reopen_handle(self, setup_path):
             store = HDFStore(path, mode="a")
             store["a"] = tm.makeTimeSeries()
 
+            msg = r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the current file!"
             # invalid mode change
-            with pytest.raises(PossibleDataLossError):
+            with pytest.raises(PossibleDataLossError, match=msg):
                 store.open("w")
 
             store.close()
@@ -713,7 +715,8 @@ def test_getattr(self, setup_path):
 
             # errors
             for x in ["d", "mode", "path", "handle", "complib"]:
-                with pytest.raises(AttributeError):
+                msg = f"'HDFStore' object has no attribute '{x}'"
+                with pytest.raises(AttributeError, match=msg):
                     getattr(store, x)
 
             # not stores
@@ -734,17 +737,18 @@ def test_put(self, setup_path):
             store.put("c", df[:10], format="table")
 
             # not OK, not a table
-            with pytest.raises(ValueError):
+            msg = "Can only append to Tables"
+            with pytest.raises(ValueError, match=msg):
                 store.put("b", df[10:], append=True)
 
             # node does not currently exist, test _is_table_type returns False
             # in this case
             _maybe_remove(store, "f")
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.put("f", df[10:], append=True)
 
             # can't put to a table (use append instead)
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.put("c", df[10:], append=True)
 
             # overwrite table
@@ -787,7 +791,8 @@ def test_put_compression(self, setup_path):
             tm.assert_frame_equal(store["c"], df)
 
             # can't compress if format='fixed'
-            with pytest.raises(ValueError):
+            msg = "Compression not supported on Fixed format stores"
+            with pytest.raises(ValueError, match=msg):
                 store.put("b", df, format="fixed", complib="zlib")
 
     @td.skip_if_windows_python_3
@@ -797,7 +802,8 @@ def test_put_compression_blosc(self, setup_path):
         with ensure_clean_store(setup_path) as store:
 
             # can't compress if format='fixed'
-            with pytest.raises(ValueError):
+            msg = "Compression not supported on Fixed format stores"
+            with pytest.raises(ValueError, match=msg):
                 store.put("b", df, format="fixed", complib="blosc")
 
             store.put("c", df, format="table", complib="blosc")
@@ -1329,7 +1335,9 @@ def test_append_frame_column_oriented(self, setup_path):
             tm.assert_frame_equal(expected, result)
 
             # this isn't supported
-            with pytest.raises(TypeError):
+            msg = re.escape("passing a filterable condition to a non-table indexer "
+                            "[Filter: Not Initialized]")
+            with pytest.raises(TypeError, match=msg):
                 store.select("df1", "columns=A and index>df.index[4]")
 
     def test_append_with_different_block_ordering(self, setup_path):
@@ -1367,12 +1375,16 @@ def test_append_with_different_block_ordering(self, setup_path):
 
             # store additional fields in different blocks
             df["int16_2"] = Series([1] * len(df), dtype="int16")
-            with pytest.raises(ValueError):
+            msg = re.escape("cannot match existing table structure for [int16] on "
+                            "appending data")
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
             # store multiple additional fields in different blocks
             df["float_3"] = Series([1.0] * len(df), dtype="float64")
-            with pytest.raises(ValueError):
+            msg = re.escape("cannot match existing table structure for [A,B] on "
+                            "appending data")
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
     def test_append_with_strings(self, setup_path):
@@ -1410,7 +1422,12 @@ def check_col(key, name, size):
                 df_new = DataFrame(
                     [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]]
                 )
-                with pytest.raises(ValueError):
+                msg = (r"Trying to store a string with len \[26\] in "
+                       r"\[values_block_1\] column but\n"
+                       r"this column has a limit of \[15\]!\n"
+                       "Consider using min_itemsize to preset the sizes on these "
+                       "columns")
+                with pytest.raises(ValueError, match=msg):
                     store.append("df_new", df_new)
 
                 # min_itemsize on Series index (GH 11412)
@@ -1488,7 +1505,9 @@ def check_col(key, name, size):
             # invalid min_itemsize keys
             df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])
             _maybe_remove(store, "df")
-            with pytest.raises(ValueError):
+            msg = re.escape("min_itemsize has the key [foo] which is not an axis or "
+                            "data_column")
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})
 
     def test_append_with_empty_string(self, setup_path):
@@ -1718,7 +1737,8 @@ def col(t, column):
                 # try to index a non-table
                 _maybe_remove(store, "f2")
                 store.put("f2", df)
-                with pytest.raises(TypeError):
+                msg = "cannot create table index on a Fixed format store"
+                with pytest.raises(TypeError, match=msg):
                     store.create_table_index("f2")
 
     def test_create_table_index_data_columns_argument(self, setup_path):
@@ -1800,9 +1820,13 @@ def test_column_multiindex(self, setup_path):
                 store["df1"], expected, check_index_type=True, check_column_type=True
             )
 
-            with pytest.raises(ValueError):
+            msg = re.escape("cannot use a multi-index on axis [1] with data_columns "
+                            "['A']")
+            with pytest.raises(ValueError, match=msg):
                 store.put("df2", df, format="table", data_columns=["A"])
-            with pytest.raises(ValueError):
+            msg = re.escape("cannot use a multi-index on axis [1] with data_columns"
+                            " True")
+            with pytest.raises(ValueError, match=msg):
                 store.put("df3", df, format="table", data_columns=True)
 
         # appending multi-column on existing table (see GH 6167)
@@ -1874,7 +1898,8 @@ def make_index(names=None):
                 columns=["a", "b"],
                 index=make_index(["date", "a", "t"]),
             )
-            with pytest.raises(ValueError):
+            msg = "duplicate names/columns in the multi-index when storing as a table"
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
             # dup within level
@@ -1884,7 +1909,7 @@ def make_index(names=None):
                 columns=["a", "b"],
                 index=make_index(["date", "date", "date"]),
             )
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
             # fully names
@@ -1945,9 +1970,13 @@ def test_pass_spec_to_storer(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
             store.put("df", df)
-            with pytest.raises(TypeError):
+            msg = ("cannot pass a column specification when reading a Fixed format "
+                   "store. this store must be selected in its entirety")
+            with pytest.raises(TypeError, match=msg):
                 store.select("df", columns=["A"])
-            with pytest.raises(TypeError):
+            msg = ("cannot pass a where specification when reading from a Fixed "
+                   "format store. this store must be selected in its entirety")
+            with pytest.raises(TypeError, match=msg):
                 store.select("df", where=[("columns=A")])
 
     def test_append_misc(self, setup_path):
@@ -2010,13 +2039,14 @@ def test_append_raise(self, setup_path):
             df = tm.makeDataFrame()
             df["invalid"] = [["a"]] * len(df)
             assert df.dtypes["invalid"] == np.object_
-            with pytest.raises(TypeError):
+            msg = re.escape("object of type 'int' has no len()")
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", df)
 
             # multiple invalid columns
             df["invalid2"] = [["a"]] * len(df)
             df["invalid3"] = [["a"]] * len(df)
-            with pytest.raises(TypeError):
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", df)
 
             # datetime with embedded nans as object
@@ -2026,15 +2056,19 @@ def test_append_raise(self, setup_path):
             s[0:5] = np.nan
             df["invalid"] = s
             assert df.dtypes["invalid"] == np.object_
-            with pytest.raises(TypeError):
+            msg = "too many timezones in this block, create separate data columns"
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", df)
 
             # directly ndarray
-            with pytest.raises(TypeError):
+            msg = "value must be None, Series, or DataFrame"
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", np.arange(10))
 
             # series directly
-            with pytest.raises(TypeError):
+            msg = re.escape("cannot properly create the storer for: "
+                            "[group->df,value-><class 'pandas.core.series.Series'>]")
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", Series(np.arange(10)))
 
             # appending an incompatible table
@@ -2042,7 +2076,10 @@ def test_append_raise(self, setup_path):
             store.append("df", df)
 
             df["foo"] = "foo"
-            with pytest.raises(ValueError):
+            msg = re.escape("invalid combination of [non_index_axes] on appending data "
+                            "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table "
+                            "[(1, ['A', 'B', 'C', 'D'])]")
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
     def test_table_index_incompatible_dtypes(self, setup_path):
@@ -2051,7 +2088,8 @@ def test_table_index_incompatible_dtypes(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
             store.put("frame", df1, format="table")
-            with pytest.raises(TypeError):
+            msg = re.escape("incompatible kind in col [integer - datetime64]")
+            with pytest.raises(TypeError, match=msg):
                 store.put("frame", df2, format="table", append=True)
 
     def test_table_values_dtypes_roundtrip(self, setup_path):
@@ -2066,7 +2104,13 @@ def test_table_values_dtypes_roundtrip(self, setup_path):
             tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)
 
             # incompatible dtype
-            with pytest.raises(ValueError):
+            msg = re.escape("invalid combination of [values_axes] on appending data "
+                            "[name->values_block_0,cname->values_block_0,"
+                            "dtype->float64,kind->float,shape->(1, 3)] vs "
+                            "current table [name->values_block_0,"
+                            "cname->values_block_0,dtype->int64,kind->integer,"
+                            "shape->None]")
+            with pytest.raises(ValueError, match=msg):
                 store.append("df_i8", df1)
 
             # check creation/storage/retrieval of float32 (a bit hacky to
@@ -2142,7 +2186,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path):
             for n, f in dtypes:
                 df = tm.makeDataFrame()
                 df[n] = f
-                with pytest.raises(TypeError):
+                msg = re.escape(f"[{n}] is not implemented as a table column")
+                with pytest.raises(TypeError, match=msg):
                     store.append(f"df1_{n}", df)
 
         # frame
@@ -2154,7 +2199,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
             # this fails because we have a date in the object block......
-            with pytest.raises(TypeError):
+            msg = "object of type 'int' has no len()"
+            with pytest.raises(TypeError, match=msg):
                 store.append("df_unimplemented", df)
 
     def test_calendar_roundtrip_issue(self, setup_path):
@@ -2287,14 +2333,19 @@ def test_invalid_terms(self, setup_path):
                 store.put("df", df, format="table")
 
                 # some invalid terms
-                with pytest.raises(TypeError):
+                msg = re.escape("__init__() missing 1 required positional argument: "
+                                "'where'")
+                with pytest.raises(TypeError, match=msg):
                     Term()
 
                 # more invalid
-                with pytest.raises(ValueError):
+                msg = re.escape("cannot process expression [df.index[3]], "
+                                "[2000-01-06 00:00:00] is not a valid condition")
+                with pytest.raises(ValueError, match=msg):
                     store.select("df", "df.index[3]")
 
-                with pytest.raises(SyntaxError):
+                msg = "invalid syntax"
+                with pytest.raises(SyntaxError, match=msg):
                     store.select("df", "index>")
 
         # from the docs
@@ -2321,7 +2372,12 @@ def test_invalid_terms(self, setup_path):
             )
             dfq.to_hdf(path, "dfq", format="table")
 
-            with pytest.raises(ValueError):
+            msg = (r"The passed where expression: A>0 or C>0\n\s*"
+                   r"contains an invalid variable reference\n\s*"
+                   r"all of the variable references must be a reference to\n\s*"
+                   r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*"
+                   r"The currently defined references are: index,columns\n")
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path, "dfq", where="A>0 or C>0")
 
     def test_same_name_scoping(self, setup_path):
@@ -2917,10 +2973,11 @@ def test_select_iterator(self, setup_path):
             df = tm.makeTimeDataFrame(500)
             df.to_hdf(path, "df_non_table")
 
-            with pytest.raises(TypeError):
+            msg = "can only use an iterator or chunksize on a table"
+            with pytest.raises(TypeError, match=msg):
                 read_hdf(path, "df_non_table", chunksize=100)
 
-            with pytest.raises(TypeError):
+            with pytest.raises(TypeError, match=msg):
                 read_hdf(path, "df_non_table", iterator=True)
 
         with ensure_clean_path(setup_path) as path:
@@ -3264,7 +3321,8 @@ def test_frame_select(self, setup_path):
             # invalid terms
             df = tm.makeTimeDataFrame()
             store.append("df_time", df)
-            with pytest.raises(ValueError):
+            msg = "could not convert string to Timestamp"
+            with pytest.raises(ValueError, match=msg):
                 store.select("df_time", "index>0")
 
             # can't select if not written as table
@@ -3311,7 +3369,8 @@ def test_frame_select_complex(self, setup_path):
             tm.assert_frame_equal(result, expected)
 
             # invert not implemented in numexpr :(
-            with pytest.raises(NotImplementedError):
+            msg = "cannot use an invert condition when passing to numexpr"
+            with pytest.raises(NotImplementedError, match=msg):
                 store.select("df", '~(string="bar")')
 
             # invert ok for filters
@@ -3397,12 +3456,13 @@ def test_invalid_filtering(self, setup_path):
         with ensure_clean_store(setup_path) as store:
             store.put("df", df, format="table")
 
+            msg = "unable to collapse Joint Filters"
             # not implemented
-            with pytest.raises(NotImplementedError):
+            with pytest.raises(NotImplementedError, match=msg):
                 store.select("df", "columns=['A'] | columns=['B']")
 
             # in theory we could deal with this
-            with pytest.raises(NotImplementedError):
+            with pytest.raises(NotImplementedError, match=msg):
                 store.select("df", "columns=['A','B'] & columns=['C']")
 
     def test_string_select(self, setup_path):
@@ -3467,7 +3527,10 @@ def test_read_column(self, setup_path):
             ):
                 store.select_column("df", "foo")
 
-            with pytest.raises(Exception):
+            msg = re.escape(
+                "select_column() got an unexpected keyword argument 'where'"
+            )
+            with pytest.raises(TypeError, match=msg):
                 store.select_column("df", "index", where=["index>5"])
 
             # valid
@@ -3476,7 +3539,11 @@ def test_read_column(self, setup_path):
             assert isinstance(result, Series)
 
             # not a data indexable column
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "column [values_block_0] can not be extracted individually; "
+                "it is not data indexable"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.select_column("df", "values_block_0")
 
             # a data column
@@ -3587,16 +3654,17 @@ def test_coordinates(self, setup_path):
             tm.assert_frame_equal(result, expected)
 
             # invalid
-            with pytest.raises(ValueError):
+            msg = "cannot process expression"
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df), dtype="float64"))
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df) + 1))
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df)), start=5)
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df)), start=5, stop=10)
 
             # selection with filter
@@ -3633,15 +3701,18 @@ def test_append_to_multiple(self, setup_path):
         with ensure_clean_store(setup_path) as store:
 
             # exceptions
-            with pytest.raises(ValueError):
+            msg = "append_to_multiple requires a selector that is in passed dict"
+            with pytest.raises(ValueError, match=msg):
                 store.append_to_multiple(
                     {"df1": ["A", "B"], "df2": None}, df, selector="df3"
                 )
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3")
 
-            with pytest.raises(ValueError):
+            msg = ("append_to_multiple must have a dictionary specified as the way to "
+                   "split the value")
+            with pytest.raises(ValueError, match=msg):
                 store.append_to_multiple("df1", df, "df1")
 
             # regular operation
@@ -3687,7 +3758,9 @@ def test_append_to_multiple_dropna_false(self, setup_path):
                 {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False
             )
 
-            with pytest.raises(ValueError):
+            # TODO Update error message to desired message for this case
+            msg = "Cannot select as multiple after appending with dropna=False"
+            with pytest.raises(ValueError, match=msg):
                 store.select_as_multiple(["df1a", "df2a"])
 
             assert not store.select("df1a").index.equals(store.select("df2a").index)
@@ -3727,18 +3800,19 @@ def test_select_as_multiple(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
 
+            msg = "keys must be a list/tuple"
             # no tables stored
-            with pytest.raises(Exception):
+            with pytest.raises(TypeError, match=msg):
                 store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")
 
             store.append("df1", df1, data_columns=["A", "B"])
             store.append("df2", df2)
 
             # exceptions
-            with pytest.raises(Exception):
+            with pytest.raises(TypeError, match=msg):
                 store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")
 
-            with pytest.raises(Exception):
+            with pytest.raises(TypeError, match=msg):
                 store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1")
 
             msg = "'No object named df3 in the file'"
@@ -3784,7 +3858,8 @@ def test_select_as_multiple(self, setup_path):
 
             # test exception for diff rows
             store.append("df3", tm.makeTimeDataFrame(nper=50))
-            with pytest.raises(ValueError):
+            msg = "all tables must have exactly the same nrows!"
+            with pytest.raises(ValueError, match=msg):
                 store.select_as_multiple(
                     ["df1", "df3"], where=["A>0", "B>0"], selector="df1"
                 )
@@ -4019,11 +4094,11 @@ def test_multiple_open_close(self, setup_path):
         with ensure_clean_path(setup_path) as path:
 
             if pytables._table_file_open_policy_is_strict:
-
                 # multiples
                 store1 = HDFStore(path)
-
-                with pytest.raises(ValueError):
+                msg = (r"The file [\S]* is already opened\.  Please close it before "
+                       r"reopening in write mode\.")
+                with pytest.raises(ValueError, match=msg):
                     HDFStore(path)
 
                 store1.close()
@@ -4086,42 +4161,44 @@ def test_multiple_open_close(self, setup_path):
             store = HDFStore(path)
             store.close()
 
-            with pytest.raises(ClosedFileError):
+            msg = r"[\S]* file is not open!"
+            with pytest.raises(ClosedFileError, match=msg):
                 store.keys()
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 "df" in store
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 len(store)
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store["df"]
 
-            with pytest.raises(AttributeError):
-                store.df
-
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.select("df")
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.get("df")
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.append("df2", df)
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.put("df3", df)
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.get_storer("df2")
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.remove("df2")
 
-            with pytest.raises(ClosedFileError, match="file is not open"):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.select("df")
 
+            msg = "'HDFStore' object has no attribute 'df'"
+            with pytest.raises(AttributeError, match=msg):
+                store.df
+
     def test_pytables_native_read(self, datapath, setup_path):
         with ensure_clean_store(
             datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
@@ -4338,7 +4415,8 @@ def test_append_with_diff_col_name_types_raises_value_error(self, setup_path):
             store.append(name, df)
 
             for d in (df2, df3, df4, df5):
-                with pytest.raises(ValueError):
+                msg = re.escape("cannot match existing table structure for [0] on appending data")
+                with pytest.raises(ValueError, match=msg):
                     store.append(name, d)
 
     def test_query_with_nested_special_character(self, setup_path):
@@ -4460,7 +4538,8 @@ def test_categorical(self, setup_path):
             df3 = df.copy()
             df3["s"] = df3["s"].cat.remove_unused_categories()
 
-            with pytest.raises(ValueError):
+            msg = "cannot append a categorical with different categories to the existing"
+            with pytest.raises(ValueError, match=msg):
                 store.append("df3", df3)
 
             # Remove, and make sure meta data is removed (its a recursive
@@ -4529,7 +4608,8 @@ def test_duplicate_column_name(self, setup_path):
         df = DataFrame(columns=["a", "a"], data=[[0, 0]])
 
         with ensure_clean_path(setup_path) as path:
-            with pytest.raises(ValueError):
+            msg = "Columns index has to be unique for fixed format"
+            with pytest.raises(ValueError, match=msg):
                 df.to_hdf(path, "df", format="fixed")
 
             df.to_hdf(path, "df", format="table")
@@ -4655,24 +4735,30 @@ def test_read_hdf_errors(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
 
         with ensure_clean_path(setup_path) as path:
-            with pytest.raises(IOError):
+            msg = r"File [\S]* does not exist"
+            with pytest.raises(IOError, match=msg):
                 read_hdf(path, "key")
 
             df.to_hdf(path, "df")
             store = HDFStore(path, mode="r")
             store.close()
 
-            with pytest.raises(IOError):
+            msg = "The HDFStore must be open for reading."
+            with pytest.raises(IOError, match=msg):
                 read_hdf(store, "df")
 
     def test_read_hdf_generic_buffer_errors(self):
-        with pytest.raises(NotImplementedError):
+        msg = "Support for generic buffers has not been implemented."
+        with pytest.raises(NotImplementedError, match=msg):
             read_hdf(BytesIO(b""), "df")
 
     def test_invalid_complib(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
         with tm.ensure_clean(setup_path) as path:
-            with pytest.raises(ValueError):
+            msg = re.escape("complib only supports ['zlib', 'lzo', 'bzip2', 'blosc', "
+                            "'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', "
+                            "'blosc:zlib', 'blosc:zstd'] compression.")
+            with pytest.raises(ValueError, match=msg):
                 df.to_hdf(path, "df", complib="foolib")
 
     # GH10443
@@ -4688,7 +4774,8 @@ def test_read_nokey(self, setup_path):
             tm.assert_frame_equal(df, reread)
             df.to_hdf(path, "df2", mode="a")
 
-            with pytest.raises(ValueError):
+            msg = "key must be provided when HDF5 file contains multiple datasets."
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path)
 
     def test_read_nokey_table(self, setup_path):
@@ -4701,15 +4788,16 @@ def test_read_nokey_table(self, setup_path):
             tm.assert_frame_equal(df, reread)
             df.to_hdf(path, "df2", mode="a", format="table")
 
-            with pytest.raises(ValueError):
+            msg = "key must be provided when HDF5 file contains multiple datasets."
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path)
 
     def test_read_nokey_empty(self, setup_path):
         with ensure_clean_path(setup_path) as path:
             store = HDFStore(path)
             store.close()
-
-            with pytest.raises(ValueError):
+            msg = re.escape("Dataset(s) incompatible with Pandas data types, not table, or no datasets found in HDF5 file.")
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path)
 
     def test_read_from_pathlib_path(self, setup_path):
@@ -4788,14 +4876,16 @@ def test_query_compare_column_type(self, setup_path):
                 # non strings to string column always fail
                 for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]:
                     query = f"date {op} v"
-                    with pytest.raises(TypeError):
+                    msg = f"Cannot compare {v} of type {type(v)} to string column"
+                    with pytest.raises(TypeError, match=msg):
                         store.select("test", where=query)
 
                 # strings to other columns must be convertible to type
                 v = "a"
                 for col in ["int", "float", "real_date"]:
                     query = f"{col} {op} v"
-                    with pytest.raises(ValueError):
+                    msg = "could not convert string to "
+                    with pytest.raises(ValueError, match=msg):
                         store.select("test", where=query)
 
                 for v, col in zip(

From d52c073882f4581748c6e3e1d8fa028dc7deb21b Mon Sep 17 00:00:00 2001
From: moink <theresa.robinson@gmail.com>
Date: Mon, 21 Dec 2020 13:22:44 +0100
Subject: [PATCH 2/4] TEST: GH30999 Add match=msg to all "with pytest.raises"
 in pandas/tests/io/pytables/test_store.py

---
 pandas/tests/io/pytables/test_store.py | 302 ++++++++++++++++++-------
 1 file changed, 216 insertions(+), 86 deletions(-)

diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index b35414724d946..b2ccd691453bb 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -483,11 +483,12 @@ def test_mode(self, setup_path):
 
         def check(mode):
 
+            msg = r"[\S]* does not exist"
             with ensure_clean_path(setup_path) as path:
 
                 # constructor
                 if mode in ["r", "r+"]:
-                    with pytest.raises(IOError):
+                    with pytest.raises(IOError, match=msg):
                         HDFStore(path, mode=mode)
 
                 else:
@@ -499,7 +500,7 @@ def check(mode):
 
                 # context
                 if mode in ["r", "r+"]:
-                    with pytest.raises(IOError):
+                    with pytest.raises(IOError, match=msg):
                         with HDFStore(path, mode=mode) as store:
                             pass
                 else:
@@ -510,7 +511,7 @@ def check(mode):
 
                 # conv write
                 if mode in ["r", "r+"]:
-                    with pytest.raises(IOError):
+                    with pytest.raises(IOError, match=msg):
                         df.to_hdf(path, "df", mode=mode)
                     df.to_hdf(path, "df", mode="w")
                 else:
@@ -549,8 +550,12 @@ def test_reopen_handle(self, setup_path):
             store = HDFStore(path, mode="a")
             store["a"] = tm.makeTimeSeries()
 
+            msg = (
+                r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the "
+                "current file!"
+            )
             # invalid mode change
-            with pytest.raises(PossibleDataLossError):
+            with pytest.raises(PossibleDataLossError, match=msg):
                 store.open("w")
 
             store.close()
@@ -713,7 +718,8 @@ def test_getattr(self, setup_path):
 
             # errors
             for x in ["d", "mode", "path", "handle", "complib"]:
-                with pytest.raises(AttributeError):
+                msg = f"'HDFStore' object has no attribute '{x}'"
+                with pytest.raises(AttributeError, match=msg):
                     getattr(store, x)
 
             # not stores
@@ -734,17 +740,18 @@ def test_put(self, setup_path):
             store.put("c", df[:10], format="table")
 
             # not OK, not a table
-            with pytest.raises(ValueError):
+            msg = "Can only append to Tables"
+            with pytest.raises(ValueError, match=msg):
                 store.put("b", df[10:], append=True)
 
             # node does not currently exist, test _is_table_type returns False
             # in this case
             _maybe_remove(store, "f")
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.put("f", df[10:], append=True)
 
             # can't put to a table (use append instead)
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.put("c", df[10:], append=True)
 
             # overwrite table
@@ -787,7 +794,8 @@ def test_put_compression(self, setup_path):
             tm.assert_frame_equal(store["c"], df)
 
             # can't compress if format='fixed'
-            with pytest.raises(ValueError):
+            msg = "Compression not supported on Fixed format stores"
+            with pytest.raises(ValueError, match=msg):
                 store.put("b", df, format="fixed", complib="zlib")
 
     @td.skip_if_windows_python_3
@@ -797,7 +805,8 @@ def test_put_compression_blosc(self, setup_path):
         with ensure_clean_store(setup_path) as store:
 
             # can't compress if format='fixed'
-            with pytest.raises(ValueError):
+            msg = "Compression not supported on Fixed format stores"
+            with pytest.raises(ValueError, match=msg):
                 store.put("b", df, format="fixed", complib="blosc")
 
             store.put("c", df, format="table", complib="blosc")
@@ -1329,7 +1338,11 @@ def test_append_frame_column_oriented(self, setup_path):
             tm.assert_frame_equal(expected, result)
 
             # this isn't supported
-            with pytest.raises(TypeError):
+            msg = re.escape(
+                "passing a filterable condition to a non-table indexer "
+                "[Filter: Not Initialized]"
+            )
+            with pytest.raises(TypeError, match=msg):
                 store.select("df1", "columns=A and index>df.index[4]")
 
     def test_append_with_different_block_ordering(self, setup_path):
@@ -1367,12 +1380,18 @@ def test_append_with_different_block_ordering(self, setup_path):
 
             # store additional fields in different blocks
             df["int16_2"] = Series([1] * len(df), dtype="int16")
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "cannot match existing table structure for [int16] on " "appending data"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
             # store multiple additional fields in different blocks
             df["float_3"] = Series([1.0] * len(df), dtype="float64")
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "cannot match existing table structure for [A,B] on " "appending data"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
     def test_append_with_strings(self, setup_path):
@@ -1410,7 +1429,14 @@ def check_col(key, name, size):
                 df_new = DataFrame(
                     [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]]
                 )
-                with pytest.raises(ValueError):
+                msg = (
+                    r"Trying to store a string with len \[26\] in "
+                    r"\[values_block_1\] column but\n"
+                    r"this column has a limit of \[15\]!\n"
+                    "Consider using min_itemsize to preset the sizes on these "
+                    "columns"
+                )
+                with pytest.raises(ValueError, match=msg):
                     store.append("df_new", df_new)
 
                 # min_itemsize on Series index (GH 11412)
@@ -1488,7 +1514,10 @@ def check_col(key, name, size):
             # invalid min_itemsize keys
             df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])
             _maybe_remove(store, "df")
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "min_itemsize has the key [foo] which is not an axis or " "data_column"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})
 
     def test_append_with_empty_string(self, setup_path):
@@ -1718,7 +1747,8 @@ def col(t, column):
                 # try to index a non-table
                 _maybe_remove(store, "f2")
                 store.put("f2", df)
-                with pytest.raises(TypeError):
+                msg = "cannot create table index on a Fixed format store"
+                with pytest.raises(TypeError, match=msg):
                     store.create_table_index("f2")
 
     def test_create_table_index_data_columns_argument(self, setup_path):
@@ -1800,9 +1830,15 @@ def test_column_multiindex(self, setup_path):
                 store["df1"], expected, check_index_type=True, check_column_type=True
             )
 
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "cannot use a multi-index on axis [1] with data_columns " "['A']"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.put("df2", df, format="table", data_columns=["A"])
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "cannot use a multi-index on axis [1] with data_columns" " True"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.put("df3", df, format="table", data_columns=True)
 
         # appending multi-column on existing table (see GH 6167)
@@ -1874,7 +1910,8 @@ def make_index(names=None):
                 columns=["a", "b"],
                 index=make_index(["date", "a", "t"]),
             )
-            with pytest.raises(ValueError):
+            msg = "duplicate names/columns in the multi-index when storing as a table"
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
             # dup within level
@@ -1884,7 +1921,7 @@ def make_index(names=None):
                 columns=["a", "b"],
                 index=make_index(["date", "date", "date"]),
             )
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
             # fully names
@@ -1945,9 +1982,17 @@ def test_pass_spec_to_storer(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
             store.put("df", df)
-            with pytest.raises(TypeError):
+            msg = (
+                "cannot pass a column specification when reading a Fixed format "
+                "store. this store must be selected in its entirety"
+            )
+            with pytest.raises(TypeError, match=msg):
                 store.select("df", columns=["A"])
-            with pytest.raises(TypeError):
+            msg = (
+                "cannot pass a where specification when reading from a Fixed "
+                "format store. this store must be selected in its entirety"
+            )
+            with pytest.raises(TypeError, match=msg):
                 store.select("df", where=[("columns=A")])
 
     def test_append_misc(self, setup_path):
@@ -2010,13 +2055,14 @@ def test_append_raise(self, setup_path):
             df = tm.makeDataFrame()
             df["invalid"] = [["a"]] * len(df)
             assert df.dtypes["invalid"] == np.object_
-            with pytest.raises(TypeError):
+            msg = re.escape("object of type 'int' has no len()")
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", df)
 
             # multiple invalid columns
             df["invalid2"] = [["a"]] * len(df)
             df["invalid3"] = [["a"]] * len(df)
-            with pytest.raises(TypeError):
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", df)
 
             # datetime with embedded nans as object
@@ -2026,15 +2072,21 @@ def test_append_raise(self, setup_path):
             s[0:5] = np.nan
             df["invalid"] = s
             assert df.dtypes["invalid"] == np.object_
-            with pytest.raises(TypeError):
+            msg = "too many timezones in this block, create separate data columns"
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", df)
 
             # directly ndarray
-            with pytest.raises(TypeError):
+            msg = "value must be None, Series, or DataFrame"
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", np.arange(10))
 
             # series directly
-            with pytest.raises(TypeError):
+            msg = re.escape(
+                "cannot properly create the storer for: "
+                "[group->df,value-><class 'pandas.core.series.Series'>]"
+            )
+            with pytest.raises(TypeError, match=msg):
                 store.append("df", Series(np.arange(10)))
 
             # appending an incompatible table
@@ -2042,7 +2094,12 @@ def test_append_raise(self, setup_path):
             store.append("df", df)
 
             df["foo"] = "foo"
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "invalid combination of [non_index_axes] on appending data "
+                "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table "
+                "[(1, ['A', 'B', 'C', 'D'])]"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
 
     def test_table_index_incompatible_dtypes(self, setup_path):
@@ -2051,7 +2108,8 @@ def test_table_index_incompatible_dtypes(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
             store.put("frame", df1, format="table")
-            with pytest.raises(TypeError):
+            msg = re.escape("incompatible kind in col [integer - datetime64]")
+            with pytest.raises(TypeError, match=msg):
                 store.put("frame", df2, format="table", append=True)
 
     def test_table_values_dtypes_roundtrip(self, setup_path):
@@ -2066,7 +2124,15 @@ def test_table_values_dtypes_roundtrip(self, setup_path):
             tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)
 
             # incompatible dtype
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "invalid combination of [values_axes] on appending data "
+                "[name->values_block_0,cname->values_block_0,"
+                "dtype->float64,kind->float,shape->(1, 3)] vs "
+                "current table [name->values_block_0,"
+                "cname->values_block_0,dtype->int64,kind->integer,"
+                "shape->None]"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.append("df_i8", df1)
 
             # check creation/storage/retrieval of float32 (a bit hacky to
@@ -2142,7 +2208,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path):
             for n, f in dtypes:
                 df = tm.makeDataFrame()
                 df[n] = f
-                with pytest.raises(TypeError):
+                msg = re.escape(f"[{n}] is not implemented as a table column")
+                with pytest.raises(TypeError, match=msg):
                     store.append(f"df1_{n}", df)
 
         # frame
@@ -2154,7 +2221,8 @@ def test_unimplemented_dtypes_table_columns(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
             # this fails because we have a date in the object block......
-            with pytest.raises(TypeError):
+            msg = "object of type 'int' has no len()"
+            with pytest.raises(TypeError, match=msg):
                 store.append("df_unimplemented", df)
 
     def test_calendar_roundtrip_issue(self, setup_path):
@@ -2287,14 +2355,22 @@ def test_invalid_terms(self, setup_path):
                 store.put("df", df, format="table")
 
                 # some invalid terms
-                with pytest.raises(TypeError):
+                msg = re.escape(
+                    "__init__() missing 1 required positional argument: " "'where'"
+                )
+                with pytest.raises(TypeError, match=msg):
                     Term()
 
                 # more invalid
-                with pytest.raises(ValueError):
+                msg = re.escape(
+                    "cannot process expression [df.index[3]], "
+                    "[2000-01-06 00:00:00] is not a valid condition"
+                )
+                with pytest.raises(ValueError, match=msg):
                     store.select("df", "df.index[3]")
 
-                with pytest.raises(SyntaxError):
+                msg = "invalid syntax"
+                with pytest.raises(SyntaxError, match=msg):
                     store.select("df", "index>")
 
         # from the docs
@@ -2321,7 +2397,14 @@ def test_invalid_terms(self, setup_path):
             )
             dfq.to_hdf(path, "dfq", format="table")
 
-            with pytest.raises(ValueError):
+            msg = (
+                r"The passed where expression: A>0 or C>0\n\s*"
+                r"contains an invalid variable reference\n\s*"
+                r"all of the variable references must be a reference to\n\s*"
+                r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*"
+                r"The currently defined references are: index,columns\n"
+            )
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path, "dfq", where="A>0 or C>0")
 
     def test_same_name_scoping(self, setup_path):
@@ -2917,10 +3000,11 @@ def test_select_iterator(self, setup_path):
             df = tm.makeTimeDataFrame(500)
             df.to_hdf(path, "df_non_table")
 
-            with pytest.raises(TypeError):
+            msg = "can only use an iterator or chunksize on a table"
+            with pytest.raises(TypeError, match=msg):
                 read_hdf(path, "df_non_table", chunksize=100)
 
-            with pytest.raises(TypeError):
+            with pytest.raises(TypeError, match=msg):
                 read_hdf(path, "df_non_table", iterator=True)
 
         with ensure_clean_path(setup_path) as path:
@@ -3264,7 +3348,8 @@ def test_frame_select(self, setup_path):
             # invalid terms
             df = tm.makeTimeDataFrame()
             store.append("df_time", df)
-            with pytest.raises(ValueError):
+            msg = "could not convert string to Timestamp"
+            with pytest.raises(ValueError, match=msg):
                 store.select("df_time", "index>0")
 
             # can't select if not written as table
@@ -3311,7 +3396,8 @@ def test_frame_select_complex(self, setup_path):
             tm.assert_frame_equal(result, expected)
 
             # invert not implemented in numexpr :(
-            with pytest.raises(NotImplementedError):
+            msg = "cannot use an invert condition when passing to numexpr"
+            with pytest.raises(NotImplementedError, match=msg):
                 store.select("df", '~(string="bar")')
 
             # invert ok for filters
@@ -3397,12 +3483,13 @@ def test_invalid_filtering(self, setup_path):
         with ensure_clean_store(setup_path) as store:
             store.put("df", df, format="table")
 
+            msg = "unable to collapse Joint Filters"
             # not implemented
-            with pytest.raises(NotImplementedError):
+            with pytest.raises(NotImplementedError, match=msg):
                 store.select("df", "columns=['A'] | columns=['B']")
 
             # in theory we could deal with this
-            with pytest.raises(NotImplementedError):
+            with pytest.raises(NotImplementedError, match=msg):
                 store.select("df", "columns=['A','B'] & columns=['C']")
 
     def test_string_select(self, setup_path):
@@ -3467,7 +3554,10 @@ def test_read_column(self, setup_path):
             ):
                 store.select_column("df", "foo")
 
-            with pytest.raises(Exception):
+            msg = re.escape(
+                "select_column() got an unexpected keyword argument 'where'"
+            )
+            with pytest.raises(TypeError, match=msg):
                 store.select_column("df", "index", where=["index>5"])
 
             # valid
@@ -3476,7 +3566,11 @@ def test_read_column(self, setup_path):
             assert isinstance(result, Series)
 
             # not a data indexable column
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "column [values_block_0] can not be extracted individually; "
+                "it is not data indexable"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.select_column("df", "values_block_0")
 
             # a data column
@@ -3587,16 +3681,17 @@ def test_coordinates(self, setup_path):
             tm.assert_frame_equal(result, expected)
 
             # invalid
-            with pytest.raises(ValueError):
+            msg = "cannot process expression"
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df), dtype="float64"))
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df) + 1))
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df)), start=5)
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.select("df", where=np.arange(len(df)), start=5, stop=10)
 
             # selection with filter
@@ -3633,15 +3728,20 @@ def test_append_to_multiple(self, setup_path):
         with ensure_clean_store(setup_path) as store:
 
             # exceptions
-            with pytest.raises(ValueError):
+            msg = "append_to_multiple requires a selector that is in passed dict"
+            with pytest.raises(ValueError, match=msg):
                 store.append_to_multiple(
                     {"df1": ["A", "B"], "df2": None}, df, selector="df3"
                 )
 
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3")
 
-            with pytest.raises(ValueError):
+            msg = (
+                "append_to_multiple must have a dictionary specified as the way to "
+                "split the value"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.append_to_multiple("df1", df, "df1")
 
             # regular operation
@@ -3687,7 +3787,9 @@ def test_append_to_multiple_dropna_false(self, setup_path):
                 {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False
             )
 
-            with pytest.raises(ValueError):
+            # TODO Update error message to desired message for this case
+            msg = "Cannot select as multiple after appending with dropna=False"
+            with pytest.raises(ValueError, match=msg):
                 store.select_as_multiple(["df1a", "df2a"])
 
             assert not store.select("df1a").index.equals(store.select("df2a").index)
@@ -3727,18 +3829,19 @@ def test_select_as_multiple(self, setup_path):
 
         with ensure_clean_store(setup_path) as store:
 
+            msg = "keys must be a list/tuple"
             # no tables stored
-            with pytest.raises(Exception):
+            with pytest.raises(TypeError, match=msg):
                 store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")
 
             store.append("df1", df1, data_columns=["A", "B"])
             store.append("df2", df2)
 
             # exceptions
-            with pytest.raises(Exception):
+            with pytest.raises(TypeError, match=msg):
                 store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")
 
-            with pytest.raises(Exception):
+            with pytest.raises(TypeError, match=msg):
                 store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1")
 
             msg = "'No object named df3 in the file'"
@@ -3784,7 +3887,8 @@ def test_select_as_multiple(self, setup_path):
 
             # test exception for diff rows
             store.append("df3", tm.makeTimeDataFrame(nper=50))
-            with pytest.raises(ValueError):
+            msg = "all tables must have exactly the same nrows!"
+            with pytest.raises(ValueError, match=msg):
                 store.select_as_multiple(
                     ["df1", "df3"], where=["A>0", "B>0"], selector="df1"
                 )
@@ -4019,11 +4123,13 @@ def test_multiple_open_close(self, setup_path):
         with ensure_clean_path(setup_path) as path:
 
             if pytables._table_file_open_policy_is_strict:
-
                 # multiples
                 store1 = HDFStore(path)
-
-                with pytest.raises(ValueError):
+                msg = (
+                    r"The file [\S]* is already opened\.  Please close it before "
+                    r"reopening in write mode\."
+                )
+                with pytest.raises(ValueError, match=msg):
                     HDFStore(path)
 
                 store1.close()
@@ -4086,42 +4192,44 @@ def test_multiple_open_close(self, setup_path):
             store = HDFStore(path)
             store.close()
 
-            with pytest.raises(ClosedFileError):
+            msg = r"[\S]* file is not open!"
+            with pytest.raises(ClosedFileError, match=msg):
                 store.keys()
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 "df" in store
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 len(store)
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store["df"]
 
-            with pytest.raises(AttributeError):
-                store.df
-
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.select("df")
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.get("df")
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.append("df2", df)
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.put("df3", df)
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.get_storer("df2")
 
-            with pytest.raises(ClosedFileError):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.remove("df2")
 
-            with pytest.raises(ClosedFileError, match="file is not open"):
+            with pytest.raises(ClosedFileError, match=msg):
                 store.select("df")
 
+            msg = "'HDFStore' object has no attribute 'df'"
+            with pytest.raises(AttributeError, match=msg):
+                store.df
+
     def test_pytables_native_read(self, datapath, setup_path):
         with ensure_clean_store(
             datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
@@ -4338,7 +4446,10 @@ def test_append_with_diff_col_name_types_raises_value_error(self, setup_path):
             store.append(name, df)
 
             for d in (df2, df3, df4, df5):
-                with pytest.raises(ValueError):
+                msg = re.escape(
+                    "cannot match existing table structure for [0] on appending data"
+                )
+                with pytest.raises(ValueError, match=msg):
                     store.append(name, d)
 
     def test_query_with_nested_special_character(self, setup_path):
@@ -4460,7 +4571,10 @@ def test_categorical(self, setup_path):
             df3 = df.copy()
             df3["s"] = df3["s"].cat.remove_unused_categories()
 
-            with pytest.raises(ValueError):
+            msg = (
+                "cannot append a categorical with different categories to the existing"
+            )
+            with pytest.raises(ValueError, match=msg):
                 store.append("df3", df3)
 
             # Remove, and make sure meta data is removed (its a recursive
@@ -4529,7 +4643,8 @@ def test_duplicate_column_name(self, setup_path):
         df = DataFrame(columns=["a", "a"], data=[[0, 0]])
 
         with ensure_clean_path(setup_path) as path:
-            with pytest.raises(ValueError):
+            msg = "Columns index has to be unique for fixed format"
+            with pytest.raises(ValueError, match=msg):
                 df.to_hdf(path, "df", format="fixed")
 
             df.to_hdf(path, "df", format="table")
@@ -4655,24 +4770,32 @@ def test_read_hdf_errors(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
 
         with ensure_clean_path(setup_path) as path:
-            with pytest.raises(IOError):
+            msg = r"File [\S]* does not exist"
+            with pytest.raises(IOError, match=msg):
                 read_hdf(path, "key")
 
             df.to_hdf(path, "df")
             store = HDFStore(path, mode="r")
             store.close()
 
-            with pytest.raises(IOError):
+            msg = "The HDFStore must be open for reading."
+            with pytest.raises(IOError, match=msg):
                 read_hdf(store, "df")
 
     def test_read_hdf_generic_buffer_errors(self):
-        with pytest.raises(NotImplementedError):
+        msg = "Support for generic buffers has not been implemented."
+        with pytest.raises(NotImplementedError, match=msg):
             read_hdf(BytesIO(b""), "df")
 
     def test_invalid_complib(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
         with tm.ensure_clean(setup_path) as path:
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "complib only supports ['zlib', 'lzo', 'bzip2', 'blosc', "
+                "'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', "
+                "'blosc:zlib', 'blosc:zstd'] compression."
+            )
+            with pytest.raises(ValueError, match=msg):
                 df.to_hdf(path, "df", complib="foolib")
 
     # GH10443
@@ -4688,7 +4811,8 @@ def test_read_nokey(self, setup_path):
             tm.assert_frame_equal(df, reread)
             df.to_hdf(path, "df2", mode="a")
 
-            with pytest.raises(ValueError):
+            msg = "key must be provided when HDF5 file contains multiple datasets."
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path)
 
     def test_read_nokey_table(self, setup_path):
@@ -4701,15 +4825,19 @@ def test_read_nokey_table(self, setup_path):
             tm.assert_frame_equal(df, reread)
             df.to_hdf(path, "df2", mode="a", format="table")
 
-            with pytest.raises(ValueError):
+            msg = "key must be provided when HDF5 file contains multiple datasets."
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path)
 
     def test_read_nokey_empty(self, setup_path):
         with ensure_clean_path(setup_path) as path:
             store = HDFStore(path)
             store.close()
-
-            with pytest.raises(ValueError):
+            msg = re.escape(
+                "Dataset(s) incompatible with Pandas data types, not table, or no "
+                "datasets found in HDF5 file."
+            )
+            with pytest.raises(ValueError, match=msg):
                 read_hdf(path)
 
     def test_read_from_pathlib_path(self, setup_path):
@@ -4788,14 +4916,16 @@ def test_query_compare_column_type(self, setup_path):
                 # non strings to string column always fail
                 for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]:
                     query = f"date {op} v"
-                    with pytest.raises(TypeError):
+                    msg = f"Cannot compare {v} of type {type(v)} to string column"
+                    with pytest.raises(TypeError, match=msg):
                         store.select("test", where=query)
 
                 # strings to other columns must be convertible to type
                 v = "a"
                 for col in ["int", "float", "real_date"]:
                     query = f"{col} {op} v"
-                    with pytest.raises(ValueError):
+                    msg = "could not convert string to "
+                    with pytest.raises(ValueError, match=msg):
                         store.select("test", where=query)
 
                 for v, col in zip(

From 920589f50f1f353e476c3cff592003db5c53e35d Mon Sep 17 00:00:00 2001
From: moink <theresa.robinson@gmail.com>
Date: Mon, 21 Dec 2020 14:23:32 +0100
Subject: [PATCH 3/4] Fix strings split in two and reformatted by black

---
 pandas/tests/io/pytables/test_store.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index b2ccd691453bb..02969b5e525ec 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -1381,7 +1381,7 @@ def test_append_with_different_block_ordering(self, setup_path):
             # store additional fields in different blocks
             df["int16_2"] = Series([1] * len(df), dtype="int16")
             msg = re.escape(
-                "cannot match existing table structure for [int16] on " "appending data"
+                "cannot match existing table structure for [int16] on appending data"
             )
             with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
@@ -1389,7 +1389,7 @@ def test_append_with_different_block_ordering(self, setup_path):
             # store multiple additional fields in different blocks
             df["float_3"] = Series([1.0] * len(df), dtype="float64")
             msg = re.escape(
-                "cannot match existing table structure for [A,B] on " "appending data"
+                "cannot match existing table structure for [A,B] on appending data"
             )
             with pytest.raises(ValueError, match=msg):
                 store.append("df", df)
@@ -1515,7 +1515,7 @@ def check_col(key, name, size):
             df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])
             _maybe_remove(store, "df")
             msg = re.escape(
-                "min_itemsize has the key [foo] which is not an axis or " "data_column"
+                "min_itemsize has the key [foo] which is not an axis or data_column"
             )
             with pytest.raises(ValueError, match=msg):
                 store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})
@@ -1831,12 +1831,12 @@ def test_column_multiindex(self, setup_path):
             )
 
             msg = re.escape(
-                "cannot use a multi-index on axis [1] with data_columns " "['A']"
+                "cannot use a multi-index on axis [1] with data_columns ['A']"
             )
             with pytest.raises(ValueError, match=msg):
                 store.put("df2", df, format="table", data_columns=["A"])
             msg = re.escape(
-                "cannot use a multi-index on axis [1] with data_columns" " True"
+                "cannot use a multi-index on axis [1] with data_columns True"
             )
             with pytest.raises(ValueError, match=msg):
                 store.put("df3", df, format="table", data_columns=True)
@@ -2356,7 +2356,7 @@ def test_invalid_terms(self, setup_path):
 
                 # some invalid terms
                 msg = re.escape(
-                    "__init__() missing 1 required positional argument: " "'where'"
+                    "__init__() missing 1 required positional argument: 'where'"
                 )
                 with pytest.raises(TypeError, match=msg):
                     Term()

From 4e41e6f641e5dfcf7d31588f30d2fde1ad1f3724 Mon Sep 17 00:00:00 2001
From: moink <theresa.robinson@gmail.com>
Date: Mon, 21 Dec 2020 15:08:10 +0100
Subject: [PATCH 4/4] TST Fix match argument so it should work on all platforms
 - was failing on windows

---
 pandas/tests/io/pytables/test_store.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index 02969b5e525ec..274efda55414c 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -4790,11 +4790,7 @@ def test_read_hdf_generic_buffer_errors(self):
     def test_invalid_complib(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
         with tm.ensure_clean(setup_path) as path:
-            msg = re.escape(
-                "complib only supports ['zlib', 'lzo', 'bzip2', 'blosc', "
-                "'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', "
-                "'blosc:zlib', 'blosc:zstd'] compression."
-            )
+            msg = r"complib only supports \[.*\] compression."
             with pytest.raises(ValueError, match=msg):
                 df.to_hdf(path, "df", complib="foolib")