diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index 1f296c0d6c088..f32a8adfd4d33 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -157,6 +157,22 @@ lines are replaced by an ellipsis. df pd.reset_option('max_rows') +Once the ``display.max_rows`` is exceeded, the ``display.min_rows`` options +determines how many rows are shown in the truncated repr. + +.. ipython:: python + + pd.set_option('max_rows', 8) + pd.set_option('max_rows', 4) + # below max_rows -> all rows shown + df = pd.DataFrame(np.random.randn(7, 2)) + df + # above max_rows -> only min_rows (4) rows shown + df = pd.DataFrame(np.random.randn(9, 2)) + df + pd.reset_option('max_rows') + pd.reset_option('min_rows') + ``display.expand_frame_repr`` allows for the representation of dataframes to stretch across pages, wrapped over the full column vs row-wise. @@ -352,8 +368,12 @@ display.max_rows 60 This sets the maximum numbe out various output. For example, this value determines whether the repr() for a dataframe prints out - fully or just a summary repr. + fully or just a truncated or summary repr. 'None' value means unlimited. +display.min_rows 10 The numbers of rows to show in a truncated + repr (when `max_rows` is exceeded). Ignored + when `max_rows` is set to None or 0. When set + to None, follows the value of `max_rows`. display.max_seq_items 100 when pretty-printing a long sequence, no more then `max_seq_items` will be printed. If items are omitted, diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e9d23cfd8efc1..a83cabf82b424 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -134,6 +134,30 @@ than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally, the output will truncate, if it's wider than :attr:`options.display.width` (default: 80 characters). +.. _whatsnew_0250.enhancements.shorter_truncated_repr: + +Shorter truncated repr for Series and DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Currently, the default display options of pandas ensure that when a Series +or DataFrame has more than 60 rows, its repr gets truncated to this maximum +of 60 rows (the ``display.max_rows`` option). However, this still gives +a repr that takes up a large part of the vertical screen estate. Therefore, +a new option ``display.min_rows`` is introduced with a default of 10 which +determines the number of rows showed in the truncated repr: + +- For small Series or DataFrames, up to ``max_rows`` number of rows is shown + (default: 60). +- For larger Series of DataFrame with a length above ``max_rows``, only + ``min_rows`` number of rows is shown (default: 10, i.e. the first and last + 5 rows). + +This dual option allows to still see the full content of relatively small +objects (e.g. ``df.head(20)`` shows all 20 rows), while giving a brief repr +for large objects. + +To restore the previous behaviour of a single threshold, set +``pd.options.display.min_rows = None``. .. _whatsnew_0250.enhancements.other: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 84ca154d045fe..31719ee0106fb 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -77,6 +77,13 @@ def use_numexpr_cb(key): correct auto-detection. """ +pc_min_rows_doc = """ +: int + The numbers of rows to show in a truncated view (when `max_rows` is + exceeded). Ignored when `max_rows` is set to None or 0. When set to + None, follows the value of `max_rows`. +""" + pc_max_cols_doc = """ : int If max_cols is exceeded, switch to truncate view. Depending on @@ -306,6 +313,8 @@ def is_terminal(): validator=is_instance_factory((int, type(None)))) cf.register_option('max_rows', 60, pc_max_rows_doc, validator=is_instance_factory([type(None), int])) + cf.register_option('min_rows', 10, pc_min_rows_doc, + validator=is_instance_factory([type(None), int])) cf.register_option('max_categories', 8, pc_max_categories_doc, validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ff3fff22f4f0..9294f1489a0e5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -588,14 +588,16 @@ def __repr__(self): return buf.getvalue() max_rows = get_option("display.max_rows") + min_rows = get_option("display.min_rows") max_cols = get_option("display.max_columns") show_dimensions = get_option("display.show_dimensions") if get_option("display.expand_frame_repr"): width, _ = console.get_console_size() else: width = None - self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, - line_width=width, show_dimensions=show_dimensions) + self.to_string(buf=buf, max_rows=max_rows, min_rows=min_rows, + max_cols=max_cols, line_width=width, + show_dimensions=show_dimensions) return buf.getvalue() @@ -633,8 +635,8 @@ def _repr_html_(self): def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, - max_rows=None, max_cols=None, show_dimensions=False, - decimal='.', line_width=None): + max_rows=None, min_rows=None, max_cols=None, + show_dimensions=False, decimal='.', line_width=None): """ Render a DataFrame to a console-friendly tabular output. %(shared_params)s @@ -663,6 +665,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True, sparsify=sparsify, justify=justify, index_names=index_names, header=header, index=index, + min_rows=min_rows, max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, diff --git a/pandas/core/series.py b/pandas/core/series.py index 9179099562832..9a21fc86147f0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1486,17 +1486,20 @@ def __repr__(self): width, height = get_terminal_size() max_rows = (height if get_option("display.max_rows") == 0 else get_option("display.max_rows")) + min_rows = (height if get_option("display.max_rows") == 0 else + get_option("display.min_rows")) show_dimensions = get_option("display.show_dimensions") self.to_string(buf=buf, name=self.name, dtype=self.dtype, - max_rows=max_rows, length=show_dimensions) + min_rows=min_rows, max_rows=max_rows, + length=show_dimensions) result = buf.getvalue() return result def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, index=True, length=False, dtype=False, name=False, - max_rows=None): + max_rows=None, min_rows=None): """ Render a string representation of the Series. @@ -1522,6 +1525,9 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, max_rows : int, optional Maximum number of rows to show before truncating. If None, show all. + min_rows : int, optional + The number of rows to display in a truncated repr (when number + of rows is above `max_rows`). Returns ------- @@ -1533,6 +1539,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, header=header, index=index, dtype=dtype, na_rep=na_rep, float_format=float_format, + min_rows=min_rows, max_rows=max_rows) result = formatter.to_string() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3f98fc235b2c5..98c31fbeb78e6 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -79,6 +79,9 @@ * unset. max_rows : int, optional Maximum number of rows to display in the console. + min_rows : int, optional + The number of rows to display in the console in a truncated repr + (when number of rows is above `max_rows`). max_cols : int, optional Maximum number of columns to display in the console. show_dimensions : bool, default False @@ -159,7 +162,7 @@ class SeriesFormatter: def __init__(self, series, buf=None, length=True, header=True, index=True, na_rep='NaN', name=False, float_format=None, dtype=True, - max_rows=None): + max_rows=None, min_rows=None): self.series = series self.buf = buf if buf is not None else StringIO() self.name = name @@ -168,6 +171,7 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, self.length = length self.index = index self.max_rows = max_rows + self.min_rows = min_rows if float_format is None: float_format = get_option("display.float_format") @@ -179,10 +183,17 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, def _chk_truncate(self): from pandas.core.reshape.concat import concat + min_rows = self.min_rows max_rows = self.max_rows + # truncation determined by max_rows, actual truncated number of rows + # used below by min_rows truncate_v = max_rows and (len(self.series) > max_rows) series = self.series if truncate_v: + if min_rows: + # if min_rows is set (not None or 0), set max_rows to minimum + # of both + max_rows = min(min_rows, max_rows) if max_rows == 1: row_num = max_rows series = series.iloc[:max_rows] @@ -391,8 +402,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, index_names=True, line_width=None, max_rows=None, - max_cols=None, show_dimensions=False, decimal='.', - table_id=None, render_links=False, **kwds): + min_rows=None, max_cols=None, show_dimensions=False, + decimal='.', table_id=None, render_links=False, **kwds): self.frame = frame if buf is not None: self.buf = _expand_user(_stringify_path(buf)) @@ -414,6 +425,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.index = index self.line_width = line_width self.max_rows = max_rows + self.min_rows = min_rows self.max_cols = max_cols self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) @@ -471,6 +483,10 @@ def _chk_truncate(self): max_rows = h if not hasattr(self, 'max_rows_adj'): + if max_rows: + if (len(self.frame) > max_rows) and self.min_rows: + # if truncated, set max_rows showed to min_rows + max_rows = min(self.min_rows, max_rows) self.max_rows_adj = max_rows if not hasattr(self, 'max_cols_adj'): self.max_cols_adj = max_cols diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0eeb0e6eb2f2d..7098a382cad45 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -377,6 +377,34 @@ def mkframe(n): printing.pprint_thing(df._repr_fits_horizontal_()) assert has_expanded_repr(df) + def test_repr_min_rows(self): + df = pd.DataFrame({'a': range(20)}) + + # default setting no truncation even if above min_rows + assert '..' not in repr(df) + + df = pd.DataFrame({'a': range(61)}) + + # default of max_rows 60 triggers truncation if above + assert '..' in repr(df) + + with option_context('display.max_rows', 10, 'display.min_rows', 4): + # truncated after first two rows + assert '..' in repr(df) + assert '2 ' not in repr(df) + + with option_context('display.max_rows', 12, 'display.min_rows', None): + # when set to None, follow value of max_rows + assert '5 5' in repr(df) + + with option_context('display.max_rows', 10, 'display.min_rows', 12): + # when set value higher as max_rows, use the minimum + assert '5 5' not in repr(df) + + with option_context('display.max_rows', None, 'display.min_rows', 12): + # max_rows of None -> never truncate + assert '..' not in repr(df) + def test_str_max_colwidth(self): # GH 7856 df = pd.DataFrame([{'a': 'foo', @@ -2284,6 +2312,34 @@ def test_show_dimensions(self): "display.show_dimensions", False): assert 'Length' not in repr(s) + def test_repr_min_rows(self): + s = pd.Series(range(20)) + + # default setting no truncation even if above min_rows + assert '..' not in repr(s) + + s = pd.Series(range(61)) + + # default of max_rows 60 triggers truncation if above + assert '..' in repr(s) + + with option_context('display.max_rows', 10, 'display.min_rows', 4): + # truncated after first two rows + assert '..' in repr(s) + assert '2 ' not in repr(s) + + with option_context('display.max_rows', 12, 'display.min_rows', None): + # when set to None, follow value of max_rows + assert '5 5' in repr(s) + + with option_context('display.max_rows', 10, 'display.min_rows', 12): + # when set value higher as max_rows, use the minimum + assert '5 5' not in repr(s) + + with option_context('display.max_rows', None, 'display.min_rows', 12): + # max_rows of None -> never truncate + assert '..' not in repr(s) + def test_to_string_name(self): s = Series(range(100), dtype='int64') s.name = 'myser'