From 51859b5e7588de7f1ea50c686fcc54976f70ff7b Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 22 Nov 2022 15:16:51 -0800 Subject: [PATCH 1/6] feat: Adding and fixing issues for pylint check Fixing issues for unsupported-membership-test so that we can add the PyLint check to the project --- pandas/io/parsers/c_parser_wrapper.py | 9 +++++-- pandas/tests/extension/list/test_list.py | 2 +- pandas/tests/indexes/multi/test_indexing.py | 2 +- pandas/tests/io/formats/style/test_format.py | 8 +++---- pandas/tests/io/formats/style/test_html.py | 12 ++++++---- pandas/tests/io/formats/style/test_style.py | 24 ++++++++++--------- .../tests/io/formats/style/test_to_latex.py | 8 +++---- pandas/tests/io/formats/test_format.py | 4 ++-- pandas/tests/io/formats/test_to_html.py | 10 ++++---- pandas/tests/io/test_html.py | 14 +++++------ pyproject.toml | 1 - 11 files changed, 52 insertions(+), 42 deletions(-) diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index c1f2e6ddb2388..4b3c98accfc73 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -132,7 +132,9 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: n # error: Cannot determine type of 'names' for i, n in enumerate(self.names) # type: ignore[has-type] - if (i in usecols or n in usecols) + if ( + i in usecols or n in usecols + ) # pylint: disable=unsupported-membership-test ] # error: Cannot determine type of 'names' @@ -325,7 +327,10 @@ def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]: usecols = self._evaluate_usecols(self.usecols, names) if usecols is not None and len(names) != len(usecols): names = [ - name for i, name in enumerate(names) if i in usecols or name in usecols + name + for i, name in enumerate(names) + if i in usecols + or name in usecols # pylint: disable=unsupported-membership-test ] return names diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py index 295f08679c3eb..c5f8e62dd3f21 100644 --- a/pandas/tests/extension/list/test_list.py +++ b/pandas/tests/extension/list/test_list.py @@ -30,4 +30,4 @@ def test_to_csv(data): # which was done in to_native_types df = pd.DataFrame({"a": data}) res = df.to_csv() - assert str(data[0]) in res + assert str(data[0]) in str(res) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 552b3753083fe..03d583f519cc0 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -751,7 +751,7 @@ class TestContains: def test_contains_top_level(self): midx = MultiIndex.from_product([["A", "B"], [1, 2]]) assert "A" in midx - assert "A" not in midx._engine + assert "A" not in midx._engine() def test_contains_with_nat(self): # MI with a NaT diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 0b114ea128b0b..ae134a0d9583a 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -177,12 +177,12 @@ def test_format_escape_html(escape, exp): s = Styler(df, uuid_len=0).format("&{0}&", escape=None) expected = f'&{chars}&' - assert expected in s.to_html() + assert expected in str(s.to_html()) # only the value should be escaped before passing to the formatter s = Styler(df, uuid_len=0).format("&{0}&", escape=escape) expected = f'&{exp}&' - assert expected in s.to_html() + assert expected in str(s.to_html()) # also test format_index() styler = Styler(DataFrame(columns=[chars]), uuid_len=0) @@ -195,7 +195,7 @@ def test_format_escape_html(escape, exp): def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&") + s = str(Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")) ex = 'X&<>&">X' expected2 = '&' assert ex in s.to_html() @@ -429,7 +429,7 @@ def test_precision_zero(df): def test_formatter_options_validator(formatter, exp): df = DataFrame([[9]]) with option_context("styler.format.formatter", formatter): - assert f" {exp} " in df.style.to_latex() + assert f" {exp} " in str(df.style.to_latex()) def test_formatter_options_raises(): diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 43eb4cb0502a1..8c7582c64714c 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -131,14 +131,18 @@ def test_colspan_w3(): # GH 36223 df = DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]]) styler = Styler(df, uuid="_", cell_ids=False) - assert 'l0' in styler.to_html() + assert 'l0' in str( + styler.to_html() + ) def test_rowspan_w3(): # GH 38533 df = DataFrame(data=[[1, 2]], index=[["l0", "l0"], ["l1a", "l1b"]]) styler = Styler(df, uuid="_", cell_ids=False) - assert 'l0' in styler.to_html() + assert 'l0' in str( + styler.to_html() + ) def test_styles(styler): @@ -803,7 +807,7 @@ def test_rendered_links(type, text, exp, found): styler = df.style.format_index(hyperlinks="html") rendered = f'{found}' - result = styler.to_html() + result = str(styler.to_html()) assert (rendered in result) is exp assert (text in result) is not exp # test conversion done when expected and not @@ -812,7 +816,7 @@ def test_multiple_rendered_links(): links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") # pylint: disable-next=consider-using-f-string df = DataFrame(["text {} {} text {} {}".format(*links)]) - result = df.style.format(hyperlinks="html").to_html() + result = str(df.style.format(hyperlinks="html").to_html()) href = '{0}' for link in links: assert href.format(link) in result diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 32ab0336aa93f..da9204624711d 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -256,7 +256,7 @@ def test_render_empty_mi(): """ ) - assert expected in df.style.to_html() + assert expected in str(df.style.to_html()) @pytest.mark.parametrize("comprehensive", [True, False]) @@ -505,7 +505,7 @@ def test_multiple_render(self, df): s.to_html() # do 2 renders to ensure css styles not duplicated assert ( '" in s.to_html() + " color: red;\n}\n" in str(s.to_html()) ) def test_render_empty_dfs(self): @@ -792,7 +792,7 @@ def test_init_with_na_rep(self): def test_caption(self, df): styler = Styler(df, caption="foo") - result = styler.to_html() + result = str(styler.to_html()) assert all(["caption" in result, "foo" in result]) styler = df.style @@ -802,7 +802,7 @@ def test_caption(self, df): def test_uuid(self, df): styler = Styler(df, uuid="abc123") - result = styler.to_html() + result = str(styler.to_html()) assert "abc123" in result styler = df.style @@ -813,7 +813,7 @@ def test_uuid(self, df): def test_unique_id(self): # See https://github.com/pandas-dev/pandas/issues/16780 df = DataFrame({"a": [1, 3, 5, 6], "b": [2, 4, 12, 21]}) - result = df.style.to_html(uuid="test") + result = str(df.style.to_html(uuid="test")) assert "test" in result ids = re.findall('id="(.*?)"', result) assert np.unique(ids).size == len(ids) @@ -880,7 +880,7 @@ def test_maybe_convert_css_to_tuples_err(self): def test_table_attributes(self, df): attributes = 'class="foo" data-bar' styler = Styler(df, table_attributes=attributes) - result = styler.to_html() + result = str(styler.to_html()) assert 'class="foo" data-bar' in result result = df.style.set_table_attributes(attributes).to_html() @@ -1258,13 +1258,15 @@ def test_no_cell_ids(self): def test_set_data_classes(self, classes): # GH 36159 df = DataFrame(data=[[0, 1], [2, 3]], columns=["A", "B"], index=["a", "b"]) - s = Styler(df, uuid_len=0, cell_ids=False).set_td_classes(classes).to_html() + s = str( + Styler(df, uuid_len=0, cell_ids=False).set_td_classes(classes).to_html() + ) assert '0' in s assert '1' in s assert '2' in s assert '3' in s # GH 39317 - s = Styler(df, uuid_len=0, cell_ids=True).set_td_classes(classes).to_html() + s = str(Styler(df, uuid_len=0, cell_ids=True).set_td_classes(classes).to_html()) assert '0' in s assert '1' in s assert '2' in s @@ -1280,7 +1282,7 @@ def test_set_data_classes_reindex(self): columns=[0, 2], index=[0, 2], ) - s = Styler(df, uuid_len=0).set_td_classes(classes).to_html() + s = str(Styler(df, uuid_len=0).set_td_classes(classes).to_html()) assert '0' in s assert '2' in s assert '4' in s @@ -1303,11 +1305,11 @@ def test_column_and_row_styling(self): df = DataFrame(data=[[0, 1], [1, 2]], columns=["A", "B"]) s = Styler(df, uuid_len=0) s = s.set_table_styles({"A": [{"selector": "", "props": [("color", "blue")]}]}) - assert "#T_ .col0 {\n color: blue;\n}" in s.to_html() + assert "#T_ .col0 {\n color: blue;\n}" in str(s.to_html()) s = s.set_table_styles( {0: [{"selector": "", "props": [("color", "blue")]}]}, axis=1 ) - assert "#T_ .row0 {\n color: blue;\n}" in s.to_html() + assert "#T_ .row0 {\n color: blue;\n}" in str(s.to_html()) @pytest.mark.parametrize("len_", [1, 5, 32, 33, 100]) def test_uuid_len(self, len_): diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py index b295c955a8967..ee882a21a6876 100644 --- a/pandas/tests/io/formats/style/test_to_latex.py +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -358,7 +358,7 @@ def test_multi_options(df_ext): def test_multiindex_columns_hidden(): df = DataFrame([[1, 2, 3, 4]]) df.columns = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)]) - s = df.style + s = str(df.style) assert "{tabular}{lrrrr}" in s.to_latex() s.set_table_styles([]) # reset the position command s.hide([("A", 2)], axis="columns") @@ -851,7 +851,7 @@ def test_rendered_links(): # note the majority of testing is done in test_html.py: test_rendered_links # these test only the alternative latex format is functional df = DataFrame(["text www.domain.com text"]) - result = df.style.format(hyperlinks="latex").to_latex() + result = str(df.style.format(hyperlinks="latex").to_latex()) assert r"text \href{www.domain.com}{www.domain.com} text" in result @@ -898,7 +898,7 @@ def test_clines_validation(clines, styler): @pytest.mark.parametrize("env", ["table", "longtable"]) def test_clines_index(clines, exp, env): df = DataFrame([[1], [2], [3], [4]]) - result = df.style.to_latex(clines=clines, environment=env) + result = str(df.style.to_latex(clines=clines, environment=env)) expected = f"""\ 0 & 1 \\\\{exp} 1 & 2 \\\\{exp} @@ -988,7 +988,7 @@ def test_clines_multiindex(clines, expected, env): styler = df.style styler.hide([("-", 0, "X"), ("-", 0, "Y")]) styler.hide(level=1) - result = styler.to_latex(clines=clines, environment=env) + result = str(styler.to_latex(clines=clines, environment=env)) assert expected in result diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 28590f040b8da..191b199774f5d 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2041,7 +2041,7 @@ def test_repr_float_format_in_object_col(self, float_format, expected): def test_dict_entries(self): df = DataFrame({"A": [{"a": 1, "b": 2}]}) - val = df.to_string() + val = str(df.to_string()) assert "'a': 1" in val assert "'b': 2" in val @@ -2418,7 +2418,7 @@ def test_datetimeindex(self): index = date_range("20130102", periods=6) s = Series(1, index=index) - result = s.to_string() + result = str(s.to_string()) assert "2013-01-02" in result # nat in index diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 3059efef09095..971810b876826 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -710,7 +710,7 @@ def test_to_html_truncation_index_false_max_cols( @pytest.mark.parametrize("notebook", [True, False]) def test_to_html_notebook_has_style(notebook): df = DataFrame({"A": [1, 2, 3]}) - result = df.to_html(notebook=notebook) + result = str(df.to_html(notebook=notebook)) if notebook: assert "tbody tr th:only-of-type" in result @@ -725,14 +725,14 @@ def test_to_html_notebook_has_style(notebook): def test_to_html_with_index_names_false(): # GH 16493 df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname")) - result = df.to_html(index_names=False) + result = str(df.to_html(index_names=False)) assert "myindexname" not in result def test_to_html_with_id(): # GH 8496 df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname")) - result = df.to_html(index_names=False, table_id="TEST_ID") + result = str(df.to_html(index_names=False, table_id="TEST_ID")) assert ' id="TEST_ID"' in result @@ -799,8 +799,8 @@ def test_to_html_round_column_headers(): # GH 17280 df = DataFrame([1], columns=[0.55555]) with option_context("display.precision", 3): - html = df.to_html(notebook=False) - notebook = df.to_html(notebook=True) + html = str(df.to_html(notebook=False)) + notebook = str(df.to_html(notebook=True)) assert "0.55555" in html assert "0.556" in notebook diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index daa2dffeaa143..cb511755f9ac9 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1121,18 +1121,18 @@ def test_to_html_timestamp(self): rng = date_range("2000-01-01", periods=10) df = DataFrame(np.random.randn(10, 4), index=rng) - result = df.to_html() + result = str(df.to_html()) assert "2000-01-01" in result def test_to_html_borderless(self): df = DataFrame([{"A": 1, "B": 2}]) - out_border_default = df.to_html() - out_border_true = df.to_html(border=True) - out_border_explicit_default = df.to_html(border=1) - out_border_nondefault = df.to_html(border=2) - out_border_zero = df.to_html(border=0) + out_border_default = str(df.to_html()) + out_border_true = str(df.to_html(border=True)) + out_border_explicit_default = str(df.to_html(border=1)) + out_border_nondefault = str(df.to_html(border=2)) + out_border_zero = str(df.to_html(border=0)) - out_border_false = df.to_html(border=False) + out_border_false = str(df.to_html(border=False)) assert ' border="1"' in out_border_default assert out_border_true == out_border_default diff --git a/pyproject.toml b/pyproject.toml index 8ae2d4abb29ee..8a3085d40285e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,6 @@ disable = [ "unpacking-non-sequence", "unsubscriptable-object", "unsupported-assignment-operation", - "unsupported-membership-test", # pylint type "C": convention, for programming standard violation "import-outside-toplevel", From be8c23a23e141787fc057b70aae4957407270400 Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 22 Nov 2022 15:42:39 -0800 Subject: [PATCH 2/6] fix: fixing some issues with pylint fix --- pandas/io/parsers/c_parser_wrapper.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 4b3c98accfc73..16d1988b09839 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -132,9 +132,8 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: n # error: Cannot determine type of 'names' for i, n in enumerate(self.names) # type: ignore[has-type] - if ( - i in usecols or n in usecols - ) # pylint: disable=unsupported-membership-test + # pylint: disable=unsupported-membership-test + if (i in usecols or n in usecols) ] # error: Cannot determine type of 'names' @@ -329,8 +328,8 @@ def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]: names = [ name for i, name in enumerate(names) - if i in usecols - or name in usecols # pylint: disable=unsupported-membership-test + # pylint: disable=unsupported-membership-test + if i in usecols or name in usecols ] return names From 102f91540df51d1f431b306795087d4ce10a5a9d Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 22 Nov 2022 15:56:37 -0800 Subject: [PATCH 3/6] fix: fixing formatting of a line in c_parser_wrapper.py --- pandas/io/parsers/c_parser_wrapper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 16d1988b09839..333e541c0b3ab 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -323,13 +323,11 @@ def read( def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]: # hackish + # pylint: disable=unsupported-membership-test usecols = self._evaluate_usecols(self.usecols, names) if usecols is not None and len(names) != len(usecols): names = [ - name - for i, name in enumerate(names) - # pylint: disable=unsupported-membership-test - if i in usecols or name in usecols + name for i, name in enumerate(names) if i in usecols or name in usecols ] return names From 4414a04a613ed582fca9d40e66cd2a50105f286e Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 22 Nov 2022 17:30:01 -0800 Subject: [PATCH 4/6] fix: fixing a mistake in my changes --- pandas/tests/indexes/multi/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 03d583f519cc0..6735b2f205a31 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -751,7 +751,7 @@ class TestContains: def test_contains_top_level(self): midx = MultiIndex.from_product([["A", "B"], [1, 2]]) assert "A" in midx - assert "A" not in midx._engine() + assert "A" not in list(midx._engine) def test_contains_with_nat(self): # MI with a NaT From 84a1f15f2a495813f85ac4180d521ceacb729496 Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 22 Nov 2022 17:34:18 -0800 Subject: [PATCH 5/6] fix: fixing some failures I had introduced --- pandas/tests/io/formats/style/test_format.py | 6 +++--- pandas/tests/io/formats/style/test_to_latex.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index ae134a0d9583a..80428cd9a0f3f 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -195,11 +195,11 @@ def test_format_escape_html(escape, exp): def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) - s = str(Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&") ex = 'X&<>&">X' expected2 = '&' - assert ex in s.to_html() - assert expected2 in s.to_html() + assert ex in str(s.to_html()) + assert expected2 in str(s.to_html()) # also test for format_index() df = DataFrame(columns=['<>&"', None]) diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py index ee882a21a6876..b4986e74a0641 100644 --- a/pandas/tests/io/formats/style/test_to_latex.py +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -358,11 +358,11 @@ def test_multi_options(df_ext): def test_multiindex_columns_hidden(): df = DataFrame([[1, 2, 3, 4]]) df.columns = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)]) - s = str(df.style) - assert "{tabular}{lrrrr}" in s.to_latex() + s = df.style + assert "{tabular}{lrrrr}" in str(s.to_latex()) s.set_table_styles([]) # reset the position command s.hide([("A", 2)], axis="columns") - assert "{tabular}{lrrr}" in s.to_latex() + assert "{tabular}{lrrr}" in str(s.to_latex()) @pytest.mark.parametrize( From 5d01dee28c77194584338f6ca7f14aa674281e4a Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Wed, 23 Nov 2022 14:30:59 -0800 Subject: [PATCH 6/6] test: fixing test failure for midx._engine --- pandas/tests/indexes/multi/test_indexing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 6735b2f205a31..e1b534958cf12 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -749,9 +749,10 @@ def test_where_array_like(self, listlike_box): class TestContains: def test_contains_top_level(self): + # pylint: disable=unsupported-membership-test midx = MultiIndex.from_product([["A", "B"], [1, 2]]) assert "A" in midx - assert "A" not in list(midx._engine) + assert "A" not in midx._engine def test_contains_with_nat(self): # MI with a NaT