diff --git a/doc/source/_static/print_df_new.png b/doc/source/_static/print_df_new.png new file mode 100644 index 0000000000000..767d7d3f0ef06 Binary files /dev/null and b/doc/source/_static/print_df_new.png differ diff --git a/doc/source/_static/print_df_old.png b/doc/source/_static/print_df_old.png new file mode 100644 index 0000000000000..5f458722f1269 Binary files /dev/null and b/doc/source/_static/print_df_old.png differ diff --git a/doc/source/options.rst b/doc/source/options.rst index a82be4d84bf3f..48247eb48baaf 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -78,8 +78,8 @@ with no argument ``describe_option`` will print out the descriptions for all ava Getting and Setting Options --------------------------- -As described above, :func:`~pandas.get_option` and :func:`~pandas.set_option` -are available from the pandas namespace. To change an option, call +As described above, :func:`~pandas.get_option` and :func:`~pandas.set_option` +are available from the pandas namespace. To change an option, call ``set_option('option regex', new_value)``. .. ipython:: python @@ -230,7 +230,7 @@ can specify the option ``df.info(null_counts=True)`` to override on showing a pa df.info() pd.reset_option('max_info_rows') -``display.precision`` sets the output display precision in terms of decimal places. +``display.precision`` sets the output display precision in terms of decimal places. This is only a suggestion. .. ipython:: python @@ -323,21 +323,21 @@ display.latex.multicolumn_format 'l' Alignment of multicolumn la display.latex.multirow False Combines rows when using a MultiIndex. Centered instead of top-aligned, separated by clines. -display.max_columns 20 max_rows and max_columns are used +display.max_columns 0 or 20 max_rows and max_columns are used in __repr__() methods to decide if to_string() or info() is used to render an object to a string. In - case python/IPython is running in - a terminal this can be set to 0 and + case Python/IPython is running in + a terminal this is set to 0 by default and pandas will correctly auto-detect - the width the terminal and swap to + the width of the terminal and switch to a smaller format in case all columns would not fit vertically. The IPython notebook, IPython qtconsole, or IDLE do not run in a terminal and hence it is not possible to do correct - auto-detection. 'None' value means - unlimited. + auto-detection, in which case the default + is set to 20. 'None' value means unlimited. display.max_colwidth 50 The maximum width in characters of a column in the repr of a pandas data structure. When the column overflows, @@ -402,9 +402,9 @@ display.html.table_schema False Whether to publish a Table display.html.border 1 A ``border=value`` attribute is inserted in the ```` tag for the DataFrame HTML repr. -display.html.use_mathjax True When True, Jupyter notebook will process - table contents using MathJax, rendering - mathematical expressions enclosed by the +display.html.use_mathjax True When True, Jupyter notebook will process + table contents using MathJax, rendering + mathematical expressions enclosed by the dollar symbol. io.excel.xls.writer xlwt The default Excel writer engine for 'xls' files. @@ -422,7 +422,7 @@ io.hdf.dropna_table True drop ALL nan rows when appe io.parquet.engine None The engine to use as a default for parquet reading and writing. If None then try 'pyarrow' and 'fastparquet' -mode.chained_assignment warn Controls ``SettingWithCopyWarning``: +mode.chained_assignment warn Controls ``SettingWithCopyWarning``: 'raise', 'warn', or None. Raise an exception, warn, or no action if trying to use :ref:`chained assignment `. diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 107ce7855a00d..ced7bddcaa5b3 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -658,6 +658,35 @@ Notice in the example above that the converted ``Categorical`` has retained ``or Note that the unintenional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. +.. _whatsnew_0230.api_breaking.pretty_printing: + +Better pretty-printing of DataFrames in a terminal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the default value for the maximum number of columns was +``pd.options.display.max_columns=20``. This meant that relatively wide data +frames would not fit within the terminal width, and pandas would introduce line +breaks to display these 20 columns. This resulted in an output that was +relatively difficult to read: + +.. image:: _static/print_df_old.png + +If Python runs in a terminal, the maximum number of columns is now determined +automatically so that the printed data frame fits within the current terminal +width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs +as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as +well as in many IDEs), this value cannot be inferred automatically and is thus +set to `20` as in previous versions. In a terminal, this results in a much +nicer output: + +.. image:: _static/print_df_new.png + +Note that if you don't like the new default, you can always set this option +yourself. To revert to the old setting, you can run this line: + +.. code-block:: python + + pd.options.display.max_columns = 20 + .. _whatsnew_0230.api.datetimelike: Datetimelike API Changes diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 0edbf892172a9..b836a35b8cf29 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -13,6 +13,7 @@ from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, is_one_of_factory, is_callable) from pandas.io.formats.console import detect_console_encoding +from pandas.io.formats.terminal import is_terminal # compute @@ -314,7 +315,11 @@ def table_schema_cb(key): cf.register_option('max_categories', 8, pc_max_categories_doc, validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) - cf.register_option('max_columns', 20, pc_max_cols_doc, + if is_terminal(): + max_cols = 0 # automatically determine optimal number of columns + else: + max_cols = 20 # cannot determine optimal number of columns + cf.register_option('max_columns', max_cols, pc_max_cols_doc, validator=is_instance_factory([type(None), int])) cf.register_option('large_repr', 'truncate', pc_large_repr_doc, validator=is_one_of_factory(['truncate', 'info'])) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 1731dbb3ac68d..12201f62946ac 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -625,7 +625,8 @@ def to_string(self): max_len += size_tr_col # Need to make space for largest row # plus truncate dot col dif = max_len - self.w - adj_dif = dif + # '+ 1' to avoid too wide repr (GH PR #17023) + adj_dif = dif + 1 col_lens = Series([Series(ele).apply(len).max() for ele in strcols]) n_cols = len(col_lens) diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py index 4bcb28fa59b86..07ab445182680 100644 --- a/pandas/io/formats/terminal.py +++ b/pandas/io/formats/terminal.py @@ -17,7 +17,7 @@ import sys import shutil -__all__ = ['get_terminal_size'] +__all__ = ['get_terminal_size', 'is_terminal'] def get_terminal_size(): @@ -48,6 +48,23 @@ def get_terminal_size(): return tuple_xy +def is_terminal(): + """ + Detect if Python is running in a terminal. + + Returns True if Python is running in a terminal or False if not. + """ + try: + ip = get_ipython() + except NameError: # assume standard Python interpreter in a terminal + return True + else: + if hasattr(ip, 'kernel'): # IPython as a Jupyter kernel + return False + else: # IPython in a terminal + return True + + def _get_terminal_size_windows(): res = None try: diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 90daa9aa882c8..152159965036d 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -875,10 +875,11 @@ def test_astype_str(self): columns=self.tzframe.columns) tm.assert_frame_equal(result, expected) - result = str(self.tzframe) - assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 ' - '2013-01-01 00:00:00+01:00') in result - assert ('1 2013-01-02 ' - 'NaT NaT') in result - assert ('2 2013-01-03 2013-01-03 00:00:00-05:00 ' - '2013-01-03 00:00:00+01:00') in result + with option_context('display.max_columns', 20): + result = str(self.tzframe) + assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 ' + '2013-01-01 00:00:00+01:00') in result + assert ('1 2013-01-02 ' + 'NaT NaT') in result + assert ('2 2013-01-03 2013-01-03 00:00:00-05:00 ' + '2013-01-03 00:00:00+01:00') in result diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 3e5aae10618e9..8fc6fef11798a 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -172,8 +172,8 @@ def test_repr_column_name_unicode_truncation_bug(self): 'the CSV file externally. I want to Call' ' the File through the code..')}) - result = repr(df) - assert 'StringCol' in result + with option_context('display.max_columns', 20): + assert 'StringCol' in repr(df) def test_latex_repr(self): result = r"""\begin{tabular}{llll} diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 6c3b75cdfa6df..ab9f61cffc16b 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -961,7 +961,8 @@ def test_pprint_thing(self): def test_wide_repr(self): with option_context('mode.sim_interactive', True, - 'display.show_dimensions', True): + 'display.show_dimensions', True, + 'display.max_columns', 20): max_cols = get_option('display.max_columns') df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) set_option('display.expand_frame_repr', False) @@ -979,7 +980,8 @@ def test_wide_repr(self): reset_option('display.expand_frame_repr') def test_wide_repr_wide_columns(self): - with option_context('mode.sim_interactive', True): + with option_context('mode.sim_interactive', True, + 'display.max_columns', 20): df = DataFrame(np.random.randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90]) rep_str = repr(df) @@ -987,7 +989,8 @@ def test_wide_repr_wide_columns(self): assert len(rep_str.splitlines()) == 20 def test_wide_repr_named(self): - with option_context('mode.sim_interactive', True): + with option_context('mode.sim_interactive', True, + 'display.max_columns', 20): max_cols = get_option('display.max_columns') df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) df.index.name = 'DataFrame Index' @@ -1008,7 +1011,8 @@ def test_wide_repr_named(self): reset_option('display.expand_frame_repr') def test_wide_repr_multiindex(self): - with option_context('mode.sim_interactive', True): + with option_context('mode.sim_interactive', True, + 'display.max_columns', 20): midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) max_cols = get_option('display.max_columns') df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), @@ -1030,7 +1034,8 @@ def test_wide_repr_multiindex(self): reset_option('display.expand_frame_repr') def test_wide_repr_multiindex_cols(self): - with option_context('mode.sim_interactive', True): + with option_context('mode.sim_interactive', True, + 'display.max_columns', 20): max_cols = get_option('display.max_columns') midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) mcols = MultiIndex.from_arrays( @@ -1044,15 +1049,16 @@ def test_wide_repr_multiindex_cols(self): wide_repr = repr(df) assert rep_str != wide_repr - with option_context('display.width', 150): + with option_context('display.width', 150, 'display.max_columns', 20): wider_repr = repr(df) assert len(wider_repr) < len(wide_repr) reset_option('display.expand_frame_repr') def test_wide_repr_unicode(self): - with option_context('mode.sim_interactive', True): - max_cols = get_option('display.max_columns') + with option_context('mode.sim_interactive', True, + 'display.max_columns', 20): + max_cols = 20 df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) set_option('display.expand_frame_repr', False) rep_str = repr(df) @@ -1442,17 +1448,17 @@ def test_repr_html_mathjax(self): assert 'tex2jax_ignore' in df._repr_html_() def test_repr_html_wide(self): - max_cols = get_option('display.max_columns') + max_cols = 20 df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) - reg_repr = df._repr_html_() - assert "..." not in reg_repr + with option_context('display.max_rows', 60, 'display.max_columns', 20): + assert "..." not in df._repr_html_() wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1))) - wide_repr = wide_df._repr_html_() - assert "..." in wide_repr + with option_context('display.max_rows', 60, 'display.max_columns', 20): + assert "..." in wide_df._repr_html_() def test_repr_html_wide_multiindex_cols(self): - max_cols = get_option('display.max_columns') + max_cols = 20 mcols = MultiIndex.from_product([np.arange(max_cols // 2), ['foo', 'bar']], @@ -1467,8 +1473,8 @@ def test_repr_html_wide_multiindex_cols(self): names=['first', 'second']) df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols) - wide_repr = df._repr_html_() - assert '...' in wide_repr + with option_context('display.max_rows', 60, 'display.max_columns', 20): + assert '...' in df._repr_html_() def test_repr_html_long(self): with option_context('display.max_rows', 60): @@ -1512,14 +1518,15 @@ def test_repr_html_float(self): assert u('2 columns') in long_repr def test_repr_html_long_multiindex(self): - max_rows = get_option('display.max_rows') + max_rows = 60 max_L1 = max_rows // 2 tuples = list(itertools.product(np.arange(max_L1), ['foo', 'bar'])) idx = MultiIndex.from_tuples(tuples, names=['first', 'second']) df = DataFrame(np.random.randn(max_L1 * 2, 2), index=idx, columns=['A', 'B']) - reg_repr = df._repr_html_() + with option_context('display.max_rows', 60, 'display.max_columns', 20): + reg_repr = df._repr_html_() assert '...' not in reg_repr tuples = list(itertools.product(np.arange(max_L1 + 1), ['foo', 'bar'])) @@ -1530,20 +1537,22 @@ def test_repr_html_long_multiindex(self): assert '...' in long_repr def test_repr_html_long_and_wide(self): - max_cols = get_option('display.max_columns') - max_rows = get_option('display.max_rows') + max_cols = 20 + max_rows = 60 h, w = max_rows - 1, max_cols - 1 df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) - assert '...' not in df._repr_html_() + with option_context('display.max_rows', 60, 'display.max_columns', 20): + assert '...' not in df._repr_html_() h, w = max_rows + 1, max_cols + 1 df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) - assert '...' in df._repr_html_() + with option_context('display.max_rows', 60, 'display.max_columns', 20): + assert '...' in df._repr_html_() def test_info_repr(self): - max_rows = get_option('display.max_rows') - max_cols = get_option('display.max_columns') + max_rows = 60 + max_cols = 20 # Long h, w = max_rows + 1, max_cols - 1 df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) @@ -1555,7 +1564,8 @@ def test_info_repr(self): h, w = max_rows - 1, max_cols + 1 df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) assert has_horizontally_truncated_repr(df) - with option_context('display.large_repr', 'info'): + with option_context('display.large_repr', 'info', + 'display.max_columns', max_cols): assert has_info_repr(df) def test_info_repr_max_cols(self): @@ -1575,8 +1585,8 @@ def test_info_repr_max_cols(self): # fmt.set_option('display.max_info_columns', 4) # exceeded def test_info_repr_html(self): - max_rows = get_option('display.max_rows') - max_cols = get_option('display.max_columns') + max_rows = 60 + max_cols = 20 # Long h, w = max_rows + 1, max_cols - 1 df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) @@ -1588,7 +1598,8 @@ def test_info_repr_html(self): h, w = max_rows - 1, max_cols + 1 df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) assert '