From 16a332827a6d2e815e37b3fcdd514d1ae8ca05f9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 19 Dec 2019 16:36:58 +0000 Subject: [PATCH 01/24] :sparkles: Add to_markdown method --- doc/source/whatsnew/v1.0.0.rst | 1 + environment.yml | 1 + pandas/core/frame.py | 24 +++++++++++++++++++++ pandas/tests/io/formats/test_to_markdown.py | 12 +++++++++++ pandas/util/_test_decorators.py | 7 ++++++ requirements-dev.txt | 1 + 6 files changed, 46 insertions(+) create mode 100644 pandas/tests/io/formats/test_to_markdown.py diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a52c9371a79f1..f769689250d77 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -208,6 +208,7 @@ Other enhancements - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) +- :meth:`DataFrame.to_markdown` added (:issue:`11052`) Build Changes diff --git a/environment.yml b/environment.yml index f930458d0a855..2a7a6640428c3 100644 --- a/environment.yml +++ b/environment.yml @@ -75,6 +75,7 @@ dependencies: - matplotlib>=2.2.2 # pandas.plotting, Series.plot, DataFrame.plot - numexpr>=2.6.8 - scipy>=1.1 + - tabulate # optional for io # --------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9ec646a100a1a..449a1ee35a5a6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -37,6 +37,7 @@ from pandas._libs import algos as libalgos, lib from pandas._typing import Axes, Dtype, FilePathOrBuffer +from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, @@ -1964,6 +1965,29 @@ def to_feather(self, path): to_feather(self, path) + def to_markdown(self): + """ + Print a DataFrame in markdown-friendly format. + + .. versionadded:: 1.0 + + Returns + ------- + str + DataFrame in markdown-friendly format. + + Examples + -------- + >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) + >>> print(df.to_markdown()) + | | col1 | col2 | + |---:|-------:|-------:| + | 0 | 1 | 3 | + | 1 | 2 | 4 | + """ + tabulate = import_optional_dependency("tabulate") + return self.pipe(tabulate.tabulate, headers="keys", tablefmt="pipe") + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( self, diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py new file mode 100644 index 0000000000000..bbbc5955956c7 --- /dev/null +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -0,0 +1,12 @@ +import pandas.util._test_decorators as td + +import pandas as pd + + +@td.skip_if_no_tabulate +def test_to_markdown(): + df = pd.DataFrame([1, 2, 3]) + result = df.to_markdown() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index a280da6e239b2..fb7f881bdcdc2 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -119,6 +119,10 @@ def _skip_if_no_scipy() -> bool: ) +def _skip_if_no_tabulate(): + return not safe_import("tabulate") + + def skip_if_installed(package: str) -> Callable: """ Skip a test if a package is installed. @@ -193,6 +197,9 @@ def skip_if_no(package: str, min_version: Optional[str] = None) -> Callable: not _USE_NUMEXPR, reason=f"numexpr enabled->{_USE_NUMEXPR}, installed->{_NUMEXPR_INSTALLED}", ) +skip_if_no_tabulate = pytest.mark.skipif( + _skip_if_no_tabulate(), reason="Missing tabulate requirement" +) def skip_if_np_lt( diff --git a/requirements-dev.txt b/requirements-dev.txt index 827bb809d46e4..be986fc17465f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -50,6 +50,7 @@ jinja2 matplotlib>=2.2.2 numexpr>=2.6.8 scipy>=1.1 +tabulate beautifulsoup4>=4.6.0 html5lib lxml From 8eb96ecbca8e74f7ffb1a941a0e306aa33ca706a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 10:03:05 +0000 Subject: [PATCH 02/24] :pushpin: put tabulate in #optional for io, pin dependency --- environment.yml | 2 +- requirements-dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 2a7a6640428c3..b1e5bff872e9f 100644 --- a/environment.yml +++ b/environment.yml @@ -75,7 +75,6 @@ dependencies: - matplotlib>=2.2.2 # pandas.plotting, Series.plot, DataFrame.plot - numexpr>=2.6.8 - scipy>=1.1 - - tabulate # optional for io # --------------- @@ -101,5 +100,6 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray # DataFrame.to_xarray - pyreadstat # pandas.read_spss + - tabulate>=0.8.6 # DataFrame.to_markdown - pip: - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master diff --git a/requirements-dev.txt b/requirements-dev.txt index be986fc17465f..65348b20fc555 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -50,7 +50,6 @@ jinja2 matplotlib>=2.2.2 numexpr>=2.6.8 scipy>=1.1 -tabulate beautifulsoup4>=4.6.0 html5lib lxml @@ -68,4 +67,5 @@ s3fs sqlalchemy xarray pyreadstat +tabulate>=0.8.6 git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master \ No newline at end of file From 00fd8a494eb54f95fe9b5f51cd198f0172c9bcf1 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 10:04:58 +0000 Subject: [PATCH 03/24] :recycle: remove call to DataFrame.pipe in DataFrame.to_markdown --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 449a1ee35a5a6..f88a180299f57 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1965,7 +1965,7 @@ def to_feather(self, path): to_feather(self, path) - def to_markdown(self): + def to_markdown(self, **kwargs): """ Print a DataFrame in markdown-friendly format. @@ -1986,7 +1986,7 @@ def to_markdown(self): | 1 | 2 | 4 | """ tabulate = import_optional_dependency("tabulate") - return self.pipe(tabulate.tabulate, headers="keys", tablefmt="pipe") + return tabulate.tabulate(self, headers="keys", tablefmt="pipe", **kwargs) @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( From 65e9f1b789b3fbf80e1293b488c2d4918e47c2e3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 10:28:56 +0000 Subject: [PATCH 04/24] :pushpin: add tabulate to travis-38.yaml --- ci/deps/travis-38.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml index 828f02596a70e..bab7003e73f28 100644 --- a/ci/deps/travis-38.yaml +++ b/ci/deps/travis-38.yaml @@ -17,3 +17,4 @@ dependencies: - nomkl - pytz - pip + - tabulate==0.8.6 From a2735602e90cc112f0f3a22c9bf0780c1e7a1117 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 10:29:35 +0000 Subject: [PATCH 05/24] :pencil: add DataFrame.to_markdown to API reference file --- doc/source/reference/frame.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 815f3f9c19d49..4c9df35ea8d9d 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -361,4 +361,5 @@ Serialization / IO / conversion DataFrame.to_records DataFrame.to_string DataFrame.to_clipboard + DataFrame.to_markdown DataFrame.style From 14e36e88d72c7a7a954ff981fb56cb18e5f8b92b Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 10:39:20 +0000 Subject: [PATCH 06/24] :sparkles: add **kwargs to DataFrame.to_markdown --- pandas/core/frame.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f88a180299f57..ceac56fb6822f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1986,7 +1986,11 @@ def to_markdown(self, **kwargs): | 1 | 2 | 4 | """ tabulate = import_optional_dependency("tabulate") - return tabulate.tabulate(self, headers="keys", tablefmt="pipe", **kwargs) + if "headers" not in kwargs: + kwargs["headers"] = "keys" + if "tablefmt" not in kwargs: + kwargs["tablefmt"] = "pipe" + return tabulate.tabulate(self, **kwargs) @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( From ee07c6832c886fff9786ee56d95cfe8036c96ef0 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 10:40:12 +0000 Subject: [PATCH 07/24] :white_check_mark: update tests so they work with **kwargs, set skip_if_no_tabulate to whole class --- pandas/tests/io/formats/test_to_markdown.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index bbbc5955956c7..1107c43b40ccc 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -4,9 +4,16 @@ @td.skip_if_no_tabulate -def test_to_markdown(): - df = pd.DataFrame([1, 2, 3]) - result = df.to_markdown() - assert ( - result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" - ) +class TestToMarkdown: + def test_to_markdown(self): + df = pd.DataFrame([1, 2, 3]) + result = df.to_markdown() + assert ( + result + == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + def test_to_markdown_other_tablefmt(self): + df = pd.DataFrame([1, 2, 3]) + result = df.to_markdown(tablefmt="jira") + assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" From d99a54f3bc08e5f5faa8643119cee2ce6c20bfb2 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 12:53:33 +0000 Subject: [PATCH 08/24] :sparkles: add to_markdown to Series --- pandas/core/series.py | 22 +++++++++++++++++++++ pandas/tests/io/formats/test_to_markdown.py | 20 +++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b52ab4c248498..5eef033ea9467 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1439,6 +1439,28 @@ def to_string( with open(buf, "w") as f: f.write(result) + def to_markdown(self, **kwargs): + """ + Print a Series in markdown-friendly format. + + .. versionadded:: 1.0 + + Returns + ------- + str + Series in markdown-friendly format. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> print(s.to_markdown()) + | | col1 | col2 | + |---:|-------:|-------:| + | 0 | 1 | 3 | + | 1 | 2 | 4 | + """ + return self.to_frame().to_markdown(**kwargs) + # ---------------------------------------------------------------------- def items(self): diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index 1107c43b40ccc..b65d524567af2 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -5,7 +5,7 @@ @td.skip_if_no_tabulate class TestToMarkdown: - def test_to_markdown(self): + def test_simple(self): df = pd.DataFrame([1, 2, 3]) result = df.to_markdown() assert ( @@ -13,7 +13,23 @@ def test_to_markdown(self): == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" ) - def test_to_markdown_other_tablefmt(self): + def test_other_tablefmt(self): df = pd.DataFrame([1, 2, 3]) result = df.to_markdown(tablefmt="jira") assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + + def test_other_headers(self): + df = pd.DataFrame([1, 2, 3]) + result = df.to_markdown(headers=["foo", "bar"]) + assert ( + result + == "| foo | bar |\n|------:|------:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + def test_series(self): + s = pd.Series([1, 2, 3], name="foo") + result = s.to_markdown() + assert ( + result + == "| | foo |\n|---:|------:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) From 57dfb7bf5347286b976d1700c1fdeaccb46a59d6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 13:07:05 +0000 Subject: [PATCH 09/24] :pencil: document to_markdown in Series API reference --- doc/source/reference/series.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 6e1ee303135d8..0639730e2dcde 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -578,3 +578,4 @@ Serialization / IO / conversion Series.to_string Series.to_clipboard Series.to_latex + Series.to_markdown From ccb132bd5c688b3377921e1117710fcc9b84e17b Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 13:08:10 +0000 Subject: [PATCH 10/24] :white_check_mark: update tests so they test Series.to_markdown as well --- pandas/tests/io/formats/test_to_markdown.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index b65d524567af2..ac6111f98b726 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -21,15 +21,15 @@ def test_other_tablefmt(self): def test_other_headers(self): df = pd.DataFrame([1, 2, 3]) result = df.to_markdown(headers=["foo", "bar"]) - assert ( - result - == "| foo | bar |\n|------:|------:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + assert result == ( + "| foo | bar |\n|------:|------:|\n| 0 " + "| 1 |\n| 1 | 2 |\n| 2 | 3 |" ) def test_series(self): s = pd.Series([1, 2, 3], name="foo") result = s.to_markdown() - assert ( - result - == "| | foo |\n|---:|------:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + assert result == ( + "| | foo |\n|---:|------:|\n| 0 | 1 " + "|\n| 1 | 2 |\n| 2 | 3 |" ) From bac632e2b17138cd28fdd6ab06a7cb6ae6d3f7ad Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 13:43:07 +0000 Subject: [PATCH 11/24] :arrow_down: Set tabulate dependency at 0.8, before which tests fail --- ci/deps/travis-38.yaml | 2 +- environment.yml | 2 +- requirements-dev.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml index bab7003e73f28..a544e29193612 100644 --- a/ci/deps/travis-38.yaml +++ b/ci/deps/travis-38.yaml @@ -17,4 +17,4 @@ dependencies: - nomkl - pytz - pip - - tabulate==0.8.6 + - tabulate==0.8 diff --git a/environment.yml b/environment.yml index b1e5bff872e9f..503e4914f5e6d 100644 --- a/environment.yml +++ b/environment.yml @@ -100,6 +100,6 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray # DataFrame.to_xarray - pyreadstat # pandas.read_spss - - tabulate>=0.8.6 # DataFrame.to_markdown + - tabulate>=0.8 # DataFrame.to_markdown - pip: - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master diff --git a/requirements-dev.txt b/requirements-dev.txt index 65348b20fc555..ba37981c74530 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -67,5 +67,5 @@ s3fs sqlalchemy xarray pyreadstat -tabulate>=0.8.6 +tabulate>=0.8 git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master \ No newline at end of file From 557e6dd90e88f2dc14044c843cb931b7181c4196 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 13:43:23 +0000 Subject: [PATCH 12/24] :pencil: update failing docstring --- pandas/core/series.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 5eef033ea9467..2995b3c1d79cc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1454,10 +1454,12 @@ def to_markdown(self, **kwargs): -------- >>> s = pd.Series([1, 2, 3, 4]) >>> print(s.to_markdown()) - | | col1 | col2 | - |---:|-------:|-------:| - | 0 | 1 | 3 | - | 1 | 2 | 4 | + | | 0 | + |---:|----:| + | 0 | 1 | + | 1 | 2 | + | 2 | 3 | + | 3 | 4 | """ return self.to_frame().to_markdown(**kwargs) From 01260f2bea52f6eeca275b3f256b306643e27218 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 15:18:48 +0000 Subject: [PATCH 13/24] :pushpin: set tabulate dependency at 0.8.0, not 0.8 --- ci/deps/travis-38.yaml | 2 +- environment.yml | 2 +- requirements-dev.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml index a544e29193612..d6cc670d5c7c5 100644 --- a/ci/deps/travis-38.yaml +++ b/ci/deps/travis-38.yaml @@ -17,4 +17,4 @@ dependencies: - nomkl - pytz - pip - - tabulate==0.8 + - tabulate==0.8.0 diff --git a/environment.yml b/environment.yml index 503e4914f5e6d..40a2aaf332328 100644 --- a/environment.yml +++ b/environment.yml @@ -100,6 +100,6 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray # DataFrame.to_xarray - pyreadstat # pandas.read_spss - - tabulate>=0.8 # DataFrame.to_markdown + - tabulate>=0.8.0 # DataFrame.to_markdown - pip: - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master diff --git a/requirements-dev.txt b/requirements-dev.txt index ba37981c74530..ad16f3c646bd8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -67,5 +67,5 @@ s3fs sqlalchemy xarray pyreadstat -tabulate>=0.8 +tabulate>=0.8.0 git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master \ No newline at end of file From b32d54dc0544f0922527ff0f0ceb2077f11dd319 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 15:19:40 +0000 Subject: [PATCH 14/24] :sparkles: add buf and mode arguments to to_markdown --- pandas/core/frame.py | 20 ++++++++++++++------ pandas/core/series.py | 10 ++++++++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ceac56fb6822f..b8f0862781b9f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -119,6 +119,7 @@ from pandas.core.ops.missing import dispatch_fill_zeros from pandas.core.series import Series +from pandas.io.common import get_filepath_or_buffer from pandas.io.formats import console, format as fmt from pandas.io.formats.printing import pprint_thing import pandas.plotting @@ -1965,7 +1966,12 @@ def to_feather(self, path): to_feather(self, path) - def to_markdown(self, **kwargs): + def to_markdown( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + mode: Optional[str] = None, + **kwargs, + ) -> None: """ Print a DataFrame in markdown-friendly format. @@ -1985,12 +1991,14 @@ def to_markdown(self, **kwargs): | 0 | 1 | 3 | | 1 | 2 | 4 | """ + if buf is None: + buf = sys.stdout + + buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode) tabulate = import_optional_dependency("tabulate") - if "headers" not in kwargs: - kwargs["headers"] = "keys" - if "tablefmt" not in kwargs: - kwargs["tablefmt"] = "pipe" - return tabulate.tabulate(self, **kwargs) + kwargs.setdefault("headers", "keys") + kwargs.setdefault("tablefmt", "pipe") + buf.writelines(tabulate.tabulate(self, **kwargs)) @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( diff --git a/pandas/core/series.py b/pandas/core/series.py index 2995b3c1d79cc..8ac0d89f2068e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -47,6 +47,7 @@ ) import pandas as pd +from pandas._typing import FilePathOrBuffer from pandas.core import algorithms, base, generic, nanops, ops from pandas.core.accessor import CachedAccessor from pandas.core.arrays import ExtensionArray, try_cast_to_ea @@ -1439,7 +1440,12 @@ def to_string( with open(buf, "w") as f: f.write(result) - def to_markdown(self, **kwargs): + def to_markdown( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + mode: Optional[str] = None, + **kwargs, + ) -> str: """ Print a Series in markdown-friendly format. @@ -1461,7 +1467,7 @@ def to_markdown(self, **kwargs): | 2 | 3 | | 3 | 4 | """ - return self.to_frame().to_markdown(**kwargs) + return self.to_frame().to_markdown(buf, mode, **kwargs) # ---------------------------------------------------------------------- From 68e84d6dc56c261d8b17f8b72a3124b36da03311 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 15:20:04 +0000 Subject: [PATCH 15/24] :white_check_mark: update tests so they use buf --- pandas/tests/io/formats/test_to_markdown.py | 81 +++++++++++++-------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index ac6111f98b726..3f2cefa46d01f 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -1,35 +1,54 @@ -import pandas.util._test_decorators as td +from io import StringIO + +import pytest import pandas as pd +pytest.importorskip("tabulate") + + +def test_simple(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf) + result = buf.getvalue() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_other_tablefmt(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, tablefmt="jira") + result = buf.getvalue() + assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + + +def test_other_headers(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, headers=["foo", "bar"]) + result = buf.getvalue() + assert result == ( + "| foo | bar |\n|------:|------:|\n| 0 " + "| 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_series(): + buf = StringIO() + s = pd.Series([1, 2, 3], name="foo") + s.to_markdown(buf=buf) + result = buf.getvalue() + assert result == ( + "| | foo |\n|---:|------:|\n| 0 | 1 " + "|\n| 1 | 2 |\n| 2 | 3 |" + ) + -@td.skip_if_no_tabulate -class TestToMarkdown: - def test_simple(self): - df = pd.DataFrame([1, 2, 3]) - result = df.to_markdown() - assert ( - result - == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" - ) - - def test_other_tablefmt(self): - df = pd.DataFrame([1, 2, 3]) - result = df.to_markdown(tablefmt="jira") - assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" - - def test_other_headers(self): - df = pd.DataFrame([1, 2, 3]) - result = df.to_markdown(headers=["foo", "bar"]) - assert result == ( - "| foo | bar |\n|------:|------:|\n| 0 " - "| 1 |\n| 1 | 2 |\n| 2 | 3 |" - ) - - def test_series(self): - s = pd.Series([1, 2, 3], name="foo") - result = s.to_markdown() - assert result == ( - "| | foo |\n|---:|------:|\n| 0 | 1 " - "|\n| 1 | 2 |\n| 2 | 3 |" - ) +def test_no_buf(capsys): + df = pd.DataFrame([1, 2, 3]) + df.to_markdown() + out, _ = capsys.readouterr() + assert out == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" From 882768b273ac6a29a49f3e5fa17bd1971797fd4b Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 15:37:36 +0000 Subject: [PATCH 16/24] :fire: remove skip_if_no_tabulate, due to module-level fixture --- pandas/util/_test_decorators.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index fb7f881bdcdc2..a280da6e239b2 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -119,10 +119,6 @@ def _skip_if_no_scipy() -> bool: ) -def _skip_if_no_tabulate(): - return not safe_import("tabulate") - - def skip_if_installed(package: str) -> Callable: """ Skip a test if a package is installed. @@ -197,9 +193,6 @@ def skip_if_no(package: str, min_version: Optional[str] = None) -> Callable: not _USE_NUMEXPR, reason=f"numexpr enabled->{_USE_NUMEXPR}, installed->{_NUMEXPR_INSTALLED}", ) -skip_if_no_tabulate = pytest.mark.skipif( - _skip_if_no_tabulate(), reason="Missing tabulate requirement" -) def skip_if_np_lt( From f46edb1b3f719a5e06e38fad6a9dff3f672e5074 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 15:59:18 +0000 Subject: [PATCH 17/24] :pencil: add tabulate to install.rst and _optional, capitalise Markdown --- doc/source/getting_started/install.rst | 1 + pandas/compat/_optional.py | 1 + pandas/core/frame.py | 4 ++-- pandas/core/series.py | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 62a39fb5176f9..188e35de67f20 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -264,6 +264,7 @@ pyreadstat SPSS files (.sav) reading pytables 3.4.2 HDF5 reading / writing qtpy Clipboard I/O s3fs 0.3.0 Amazon S3 access +tabulate 0.8.0 Printing DataFrames and Series in Markdown-friendly format xarray 0.8.2 pandas-like API for N-dimensional data xclip Clipboard I/O on linux xlrd 1.1.0 Excel reading diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 412293f029fa5..9ab298c6274e3 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -23,6 +23,7 @@ "scipy": "0.19.0", "sqlalchemy": "1.1.4", "tables": "3.4.2", + "tabulate": "0.8.0", "xarray": "0.8.2", "xlrd": "1.1.0", "xlwt": "1.2.0", diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b8f0862781b9f..6e170ec298e3d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1973,14 +1973,14 @@ def to_markdown( **kwargs, ) -> None: """ - Print a DataFrame in markdown-friendly format. + Print a DataFrame in Markdown-friendly format. .. versionadded:: 1.0 Returns ------- str - DataFrame in markdown-friendly format. + DataFrame in Markdown-friendly format. Examples -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 8ac0d89f2068e..9ad5d156e4fe2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1447,14 +1447,14 @@ def to_markdown( **kwargs, ) -> str: """ - Print a Series in markdown-friendly format. + Print a Series in Markdown-friendly format. .. versionadded:: 1.0 Returns ------- str - Series in markdown-friendly format. + Series in Markdown-friendly format. Examples -------- From 32d8762c0e96f505bb0d1eaf9ead7973385cb429 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 20 Dec 2019 16:01:18 +0000 Subject: [PATCH 18/24] :push_pin: add tabulate unpinned to travis-37.yaml --- ci/deps/travis-37.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/deps/travis-37.yaml b/ci/deps/travis-37.yaml index 6826a9d072ff3..73e2c20b31438 100644 --- a/ci/deps/travis-37.yaml +++ b/ci/deps/travis-37.yaml @@ -20,6 +20,7 @@ dependencies: - pyarrow - pytz - s3fs + - tabulate - pyreadstat - pip - pip: From c5c3768e35c2320ea9ba681afee424d2d9a68e5f Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 21 Dec 2019 08:21:35 +0000 Subject: [PATCH 19/24] :bug: dont all get_filepath_or_buffer with buf equal to sys.stdout --- pandas/core/frame.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6e170ec298e3d..0b18153d5fb1c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1991,14 +1991,12 @@ def to_markdown( | 0 | 1 | 3 | | 1 | 2 | 4 | """ + tabulate = import_optional_dependency("tabulate") + result = tabulate.tabulate(self, **kwargs) if buf is None: - buf = sys.stdout - + return result buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode) - tabulate = import_optional_dependency("tabulate") - kwargs.setdefault("headers", "keys") - kwargs.setdefault("tablefmt", "pipe") - buf.writelines(tabulate.tabulate(self, **kwargs)) + buf.writelines(result) @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( From df7880c0bc878ea4dae61ddb650c0b2d04336500 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 21 Dec 2019 17:05:46 +0000 Subject: [PATCH 20/24] :ok_hand: return string if buf is set to None, use shared doc, change version introduced in to 1.0.0 --- pandas/core/frame.py | 39 ++++++++++---------- pandas/core/generic.py | 21 +++++++++++ pandas/core/series.py | 40 +++++++++------------ pandas/tests/io/formats/test_to_markdown.py | 7 ++-- 4 files changed, 60 insertions(+), 47 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0b18153d5fb1c..d8ea437afd792 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,6 +15,7 @@ import sys from textwrap import dedent from typing import ( + IO, Any, FrozenSet, Hashable, @@ -1966,37 +1967,35 @@ def to_feather(self, path): to_feather(self, path) - def to_markdown( - self, - buf: Optional[FilePathOrBuffer[str]] = None, - mode: Optional[str] = None, - **kwargs, - ) -> None: + @Appender( """ - Print a DataFrame in Markdown-friendly format. - - .. versionadded:: 1.0 - - Returns - ------- - str - DataFrame in Markdown-friendly format. - Examples -------- - >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) + >>> df = pd.DataFrame( + ... data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]} + ... ) >>> print(df.to_markdown()) - | | col1 | col2 | - |---:|-------:|-------:| - | 0 | 1 | 3 | - | 1 | 2 | 4 | + | | animal_1 | animal_2 | + |---:|:-----------|:-----------| + | 0 | elk | dog | + | 1 | pig | quetzal | """ + ) + @Substitution(klass="DataFrame") + @Appender(_shared_docs["to_markdown"]) + def to_markdown( + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + ) -> Optional[str]: + kwargs.setdefault("headers", "keys") + kwargs.setdefault("tablefmt", "pipe") tabulate = import_optional_dependency("tabulate") result = tabulate.tabulate(self, **kwargs) if buf is None: return result buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode) + assert buf is not None # Help mypy. buf.writelines(result) + return None @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 06a38448843f4..9f895c2a41d76 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1970,6 +1970,27 @@ def _repr_data_resource_(self): # ---------------------------------------------------------------------- # I/O Methods + _shared_docs[ + "to_markdown" + ] = """ + Print %(klass)s in Markdown-friendly format. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + mode : str, optional + + Returns + ------- + str + %(klass)s in Markdown-friendly format. + """ + _shared_docs[ "to_excel" ] = """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 9ad5d156e4fe2..14826e0a1d5a4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4,7 +4,7 @@ from io import StringIO from shutil import get_terminal_size from textwrap import dedent -from typing import Any, Callable, Hashable, List, Optional +from typing import IO, Any, Callable, Hashable, List, Optional import warnings import numpy as np @@ -47,7 +47,6 @@ ) import pandas as pd -from pandas._typing import FilePathOrBuffer from pandas.core import algorithms, base, generic, nanops, ops from pandas.core.accessor import CachedAccessor from pandas.core.arrays import ExtensionArray, try_cast_to_ea @@ -60,6 +59,7 @@ is_empty_data, sanitize_array, ) +from pandas.core.generic import _shared_docs from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( @@ -1440,33 +1440,25 @@ def to_string( with open(buf, "w") as f: f.write(result) - def to_markdown( - self, - buf: Optional[FilePathOrBuffer[str]] = None, - mode: Optional[str] = None, - **kwargs, - ) -> str: + @Appender( """ - Print a Series in Markdown-friendly format. - - .. versionadded:: 1.0 - - Returns - ------- - str - Series in Markdown-friendly format. - Examples -------- - >>> s = pd.Series([1, 2, 3, 4]) + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") >>> print(s.to_markdown()) - | | 0 | - |---:|----:| - | 0 | 1 | - | 1 | 2 | - | 2 | 3 | - | 3 | 4 | + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | """ + ) + @Substitution(klass="Series") + @Appender(_shared_docs["to_markdown"]) + def to_markdown( + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + ) -> Optional[str]: return self.to_frame().to_markdown(buf, mode, **kwargs) # ---------------------------------------------------------------------- diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index 3f2cefa46d01f..8893e4294353f 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -49,6 +49,7 @@ def test_series(): def test_no_buf(capsys): df = pd.DataFrame([1, 2, 3]) - df.to_markdown() - out, _ = capsys.readouterr() - assert out == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + result = df.to_markdown() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) From 039e6a9f6e83f69e6282b7617d34059e2e950590 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 21 Dec 2019 18:54:53 +0000 Subject: [PATCH 21/24] :pushpin: pin tabulate at 0.8.3 --- ci/deps/travis-38.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- environment.yml | 2 +- pandas/compat/_optional.py | 2 +- requirements-dev.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml index d6cc670d5c7c5..a627b7edc175f 100644 --- a/ci/deps/travis-38.yaml +++ b/ci/deps/travis-38.yaml @@ -17,4 +17,4 @@ dependencies: - nomkl - pytz - pip - - tabulate==0.8.0 + - tabulate==0.8.3 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 188e35de67f20..2f38a39dedbf0 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -264,7 +264,7 @@ pyreadstat SPSS files (.sav) reading pytables 3.4.2 HDF5 reading / writing qtpy Clipboard I/O s3fs 0.3.0 Amazon S3 access -tabulate 0.8.0 Printing DataFrames and Series in Markdown-friendly format +tabulate 0.8.3 Printing DataFrames and Series in Markdown-friendly format xarray 0.8.2 pandas-like API for N-dimensional data xclip Clipboard I/O on linux xlrd 1.1.0 Excel reading diff --git a/environment.yml b/environment.yml index 40a2aaf332328..7119fb5ab1b9e 100644 --- a/environment.yml +++ b/environment.yml @@ -100,6 +100,6 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray # DataFrame.to_xarray - pyreadstat # pandas.read_spss - - tabulate>=0.8.0 # DataFrame.to_markdown + - tabulate>=0.8.3 # DataFrame.to_markdown - pip: - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 9ab298c6274e3..c8cf639fcd15f 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -23,7 +23,7 @@ "scipy": "0.19.0", "sqlalchemy": "1.1.4", "tables": "3.4.2", - "tabulate": "0.8.0", + "tabulate": "0.8.3", "xarray": "0.8.2", "xlrd": "1.1.0", "xlwt": "1.2.0", diff --git a/requirements-dev.txt b/requirements-dev.txt index ad16f3c646bd8..a2c43bb6be73a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -67,5 +67,5 @@ s3fs sqlalchemy xarray pyreadstat -tabulate>=0.8.0 +tabulate>=0.8.3 git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master \ No newline at end of file From 093d63a2f9d3b146fa1b95a728554e542cb02b7b Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 23 Dec 2019 17:14:39 +0000 Subject: [PATCH 22/24] :pencil: add kwargs to parameters --- pandas/core/generic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f895c2a41d76..c4461a9530e5c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1984,6 +1984,9 @@ def _repr_data_resource_(self): sys.stdout. Pass a writable buffer if you need to further process the output. mode : str, optional + Mode in which file is opened. + **kwargs + These parameters will be passed to `tabulate`. Returns ------- From ec778165a69b79ffe6def5623c434db6888db18a Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 27 Dec 2019 08:48:55 +0000 Subject: [PATCH 23/24] :pencil: link to tabulate docs in install.rst --- doc/source/getting_started/install.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 2f38a39dedbf0..03514bf63d93c 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -234,7 +234,8 @@ Optional dependencies ~~~~~~~~~~~~~~~~~~~~~ Pandas has many optional dependencies that are only used for specific methods. -For example, :func:`pandas.read_hdf` requires the ``pytables`` package. If the +For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while +:meth:`DataFrame.to_markdown` requires the ``tabulate`` package. If the optional dependency is not installed, pandas will raise an ``ImportError`` when the method requiring that dependency is called. @@ -264,7 +265,7 @@ pyreadstat SPSS files (.sav) reading pytables 3.4.2 HDF5 reading / writing qtpy Clipboard I/O s3fs 0.3.0 Amazon S3 access -tabulate 0.8.3 Printing DataFrames and Series in Markdown-friendly format +tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_) xarray 0.8.2 pandas-like API for N-dimensional data xclip Clipboard I/O on linux xlrd 1.1.0 Excel reading @@ -302,3 +303,4 @@ top-level :func:`~pandas.read_html` function: .. _html5lib: https://github.com/html5lib/html5lib-python .. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup .. _lxml: http://lxml.de +.. _tabulate: https://github.com/astanin/python-tabulate From 32650f7e3d58b2466aa6b0b61123e6a706a954b6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 27 Dec 2019 16:18:14 +0000 Subject: [PATCH 24/24] :pencil: fix merge conflict in whatsnew --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index f769689250d77..731c098a814d8 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -208,7 +208,7 @@ Other enhancements - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) -- :meth:`DataFrame.to_markdown` added (:issue:`11052`) +- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`) Build Changes