Skip to content

Commit 874d9a7

Browse files
authored
ENH: set render limits on Styler to automatically trim dataframes (#41635)
1 parent 1131f8f commit 874d9a7

File tree

5 files changed

+208
-13
lines changed

5 files changed

+208
-13
lines changed

doc/source/user_guide/options.rst

+2
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,8 @@ styler.sparse.index True "Sparsify" MultiIndex displ
487487
elements in outer levels within groups).
488488
styler.sparse.columns True "Sparsify" MultiIndex display for columns
489489
in Styler output.
490+
styler.render.max_elements 262144 Maximum number of datapoints that Styler will render
491+
trimming either rows, columns or both to fit.
490492
======================================= ============ ==================================
491493

492494

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:`
140140

141141
:class:`.Styler` has also been compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`).
142142
One also has greater control of the display through separate sparsification of the index or columns, using the new 'styler' options context (:issue:`41142`).
143+
Render trimming has also been added for large numbers of data elements to avoid browser overload (:issue:`40712`).
143144

144145
We have added an extension to allow LaTeX styling as an alternative to CSS styling and a method :meth:`.Styler.to_latex`
145146
which renders the necessary LaTeX format including built-up styles. An additional file io function :meth:`.Styler.to_html` has been added for convenience (:issue:`40312`).

pandas/core/config_init.py

+13
Original file line numberDiff line numberDiff line change
@@ -743,9 +743,22 @@ def register_converter_cb(key):
743743
display each explicit level element in a hierarchical key for each column.
744744
"""
745745

746+
styler_max_elements = """
747+
: int
748+
The maximum number of data-cell (<td>) elements that will be rendered before
749+
trimming will occur over columns, rows or both if needed.
750+
"""
751+
746752
with cf.config_prefix("styler"):
747753
cf.register_option("sparse.index", True, styler_sparse_index_doc, validator=bool)
748754

749755
cf.register_option(
750756
"sparse.columns", True, styler_sparse_columns_doc, validator=bool
751757
)
758+
759+
cf.register_option(
760+
"render.max_elements",
761+
2 ** 18,
762+
styler_max_elements,
763+
validator=is_nonnegative_int,
764+
)

pandas/io/formats/style_render.py

+147-9
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
176176
ROW_HEADING_CLASS = "row_heading"
177177
COL_HEADING_CLASS = "col_heading"
178178
INDEX_NAME_CLASS = "index_name"
179+
TRIMMED_COL_CLASS = "col_trim"
180+
TRIMMED_ROW_CLASS = "row_trim"
179181

180182
DATA_CLASS = "data"
181183
BLANK_CLASS = "blank"
@@ -188,15 +190,34 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
188190
"caption": self.caption,
189191
}
190192

193+
max_elements = get_option("styler.render.max_elements")
194+
max_rows, max_cols = _get_trimming_maximums(
195+
len(self.data.index), len(self.data.columns), max_elements
196+
)
197+
191198
head = self._translate_header(
192-
BLANK_CLASS, BLANK_VALUE, INDEX_NAME_CLASS, COL_HEADING_CLASS, sparse_cols
199+
BLANK_CLASS,
200+
BLANK_VALUE,
201+
INDEX_NAME_CLASS,
202+
COL_HEADING_CLASS,
203+
sparse_cols,
204+
max_cols,
205+
TRIMMED_COL_CLASS,
193206
)
194207
d.update({"head": head})
195208

196209
self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(
197210
list
198211
)
199-
body = self._translate_body(DATA_CLASS, ROW_HEADING_CLASS, sparse_index)
212+
body = self._translate_body(
213+
DATA_CLASS,
214+
ROW_HEADING_CLASS,
215+
sparse_index,
216+
max_rows,
217+
max_cols,
218+
TRIMMED_ROW_CLASS,
219+
TRIMMED_COL_CLASS,
220+
)
200221
d.update({"body": body})
201222

202223
cellstyle: list[dict[str, CSSList | list[str]]] = [
@@ -227,6 +248,8 @@ def _translate_header(
227248
index_name_class: str,
228249
col_heading_class: str,
229250
sparsify_cols: bool,
251+
max_cols: int,
252+
trimmed_col_class: str,
230253
):
231254
"""
232255
Build each <tr> within table <head> as a list
@@ -252,6 +275,10 @@ def _translate_header(
252275
CSS class added to elements within the column_names section of structure.
253276
sparsify_cols : bool
254277
Whether column_headers section will add colspan attributes (>1) to elements.
278+
max_cols : int
279+
Maximum number of columns to render. If exceeded will contain `...` filler.
280+
trimmed_col_class : str
281+
CSS class added to elements within a column including `...` trimmed vals.
255282
256283
Returns
257284
-------
@@ -260,10 +287,10 @@ def _translate_header(
260287
"""
261288
# for sparsifying a MultiIndex
262289
col_lengths = _get_level_lengths(
263-
self.columns, sparsify_cols, self.hidden_columns
290+
self.columns, sparsify_cols, max_cols, self.hidden_columns
264291
)
265292

266-
clabels = self.data.columns.tolist()
293+
clabels = self.data.columns.tolist()[:max_cols] # slice to allow trimming
267294
if self.data.columns.nlevels == 1:
268295
clabels = [[x] for x in clabels]
269296
clabels = list(zip(*clabels))
@@ -300,6 +327,18 @@ def _translate_header(
300327
)
301328
for c, value in enumerate(clabels[r])
302329
]
330+
331+
if len(self.data.columns) > max_cols:
332+
# add an extra column with `...` value to indicate trimming
333+
column_headers.append(
334+
_element(
335+
"th",
336+
f"{col_heading_class} level{r} {trimmed_col_class}",
337+
"...",
338+
True,
339+
attributes="",
340+
)
341+
)
303342
head.append(index_blanks + column_name + column_headers)
304343

305344
# 2) index names
@@ -318,21 +357,33 @@ def _translate_header(
318357
for c, name in enumerate(self.data.index.names)
319358
]
320359

360+
if len(self.data.columns) <= max_cols:
361+
blank_len = len(clabels[0])
362+
else:
363+
blank_len = len(clabels[0]) + 1 # to allow room for `...` trim col
364+
321365
column_blanks = [
322366
_element(
323367
"th",
324368
f"{blank_class} col{c}",
325369
blank_value,
326370
c not in self.hidden_columns,
327371
)
328-
for c in range(len(clabels[0]))
372+
for c in range(blank_len)
329373
]
330374
head.append(index_names + column_blanks)
331375

332376
return head
333377

334378
def _translate_body(
335-
self, data_class: str, row_heading_class: str, sparsify_index: bool
379+
self,
380+
data_class: str,
381+
row_heading_class: str,
382+
sparsify_index: bool,
383+
max_rows: int,
384+
max_cols: int,
385+
trimmed_row_class: str,
386+
trimmed_col_class: str,
336387
):
337388
"""
338389
Build each <tr> within table <body> as a list
@@ -360,14 +411,52 @@ def _translate_body(
360411
The associated HTML elements needed for template rendering.
361412
"""
362413
# for sparsifying a MultiIndex
363-
idx_lengths = _get_level_lengths(self.index, sparsify_index)
414+
idx_lengths = _get_level_lengths(self.index, sparsify_index, max_rows)
364415

365-
rlabels = self.data.index.tolist()
416+
rlabels = self.data.index.tolist()[:max_rows] # slice to allow trimming
366417
if self.data.index.nlevels == 1:
367418
rlabels = [[x] for x in rlabels]
368419

369420
body = []
370421
for r, row_tup in enumerate(self.data.itertuples()):
422+
if r >= max_rows: # used only to add a '...' trimmed row:
423+
index_headers = [
424+
_element(
425+
"th",
426+
f"{row_heading_class} level{c} {trimmed_row_class}",
427+
"...",
428+
not self.hidden_index,
429+
attributes="",
430+
)
431+
for c in range(self.data.index.nlevels)
432+
]
433+
434+
data = [
435+
_element(
436+
"td",
437+
f"{data_class} col{c} {trimmed_row_class}",
438+
"...",
439+
(c not in self.hidden_columns),
440+
attributes="",
441+
)
442+
for c in range(max_cols)
443+
]
444+
445+
if len(self.data.columns) > max_cols:
446+
# columns are also trimmed so we add the final element
447+
data.append(
448+
_element(
449+
"td",
450+
f"{data_class} {trimmed_row_class} {trimmed_col_class}",
451+
"...",
452+
True,
453+
attributes="",
454+
)
455+
)
456+
457+
body.append(index_headers + data)
458+
break
459+
371460
index_headers = [
372461
_element(
373462
"th",
@@ -386,6 +475,18 @@ def _translate_body(
386475

387476
data = []
388477
for c, value in enumerate(row_tup[1:]):
478+
if c >= max_cols:
479+
data.append(
480+
_element(
481+
"td",
482+
f"{data_class} row{r} {trimmed_col_class}",
483+
"...",
484+
True,
485+
attributes="",
486+
)
487+
)
488+
break
489+
389490
# add custom classes from cell context
390491
cls = ""
391492
if (r, c) in self.cell_context:
@@ -655,8 +756,40 @@ def _element(
655756
}
656757

657758

759+
def _get_trimming_maximums(rn, cn, max_elements, scaling_factor=0.8):
760+
"""
761+
Recursively reduce the number of rows and columns to satisfy max elements.
762+
763+
Parameters
764+
----------
765+
rn, cn : int
766+
The number of input rows / columns
767+
max_elements : int
768+
The number of allowable elements
769+
770+
Returns
771+
-------
772+
rn, cn : tuple
773+
New rn and cn values that satisfy the max_elements constraint
774+
"""
775+
776+
def scale_down(rn, cn):
777+
if cn >= rn:
778+
return rn, int(cn * scaling_factor)
779+
else:
780+
return int(rn * scaling_factor), cn
781+
782+
while rn * cn > max_elements:
783+
rn, cn = scale_down(rn, cn)
784+
785+
return rn, cn
786+
787+
658788
def _get_level_lengths(
659-
index: Index, sparsify: bool, hidden_elements: Sequence[int] | None = None
789+
index: Index,
790+
sparsify: bool,
791+
max_index: int,
792+
hidden_elements: Sequence[int] | None = None,
660793
):
661794
"""
662795
Given an index, find the level length for each element.
@@ -667,6 +800,8 @@ def _get_level_lengths(
667800
Index or columns to determine lengths of each element
668801
sparsify : bool
669802
Whether to hide or show each distinct element in a MultiIndex
803+
max_index : int
804+
The maximum number of elements to analyse along the index due to trimming
670805
hidden_elements : sequence of int
671806
Index positions of elements hidden from display in the index affecting
672807
length
@@ -693,6 +828,9 @@ def _get_level_lengths(
693828

694829
for i, lvl in enumerate(levels):
695830
for j, row in enumerate(lvl):
831+
if j >= max_index:
832+
# stop the loop due to display trimming
833+
break
696834
if not sparsify:
697835
lengths[(i, j)] = 1
698836
elif (row is not lib.no_default) and (j not in hidden_elements):

pandas/tests/io/formats/style/test_style.py

+45-4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
)
1818
from pandas.io.formats.style_render import (
1919
_get_level_lengths,
20+
_get_trimming_maximums,
2021
maybe_convert_css_to_tuples,
2122
non_reducing_slice,
2223
)
@@ -115,6 +116,46 @@ def test_mi_styler_sparsify_options(mi_styler):
115116
assert html1 != html2
116117

117118

119+
def test_trimming_maximum():
120+
rn, cn = _get_trimming_maximums(100, 100, 100, scaling_factor=0.5)
121+
assert (rn, cn) == (12, 6)
122+
123+
rn, cn = _get_trimming_maximums(1000, 3, 750, scaling_factor=0.5)
124+
assert (rn, cn) == (250, 3)
125+
126+
127+
def test_render_trimming():
128+
df = DataFrame(np.arange(120).reshape(60, 2))
129+
with pd.option_context("styler.render.max_elements", 6):
130+
ctx = df.style._translate(True, True)
131+
assert len(ctx["head"][0]) == 3 # index + 2 data cols
132+
assert len(ctx["body"]) == 4 # 3 data rows + trimming row
133+
assert len(ctx["body"][0]) == 3 # index + 2 data cols
134+
135+
df = DataFrame(np.arange(120).reshape(12, 10))
136+
with pd.option_context("styler.render.max_elements", 6):
137+
ctx = df.style._translate(True, True)
138+
assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming row
139+
assert len(ctx["body"]) == 4 # 3 data rows + trimming row
140+
assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming row
141+
142+
143+
def test_render_trimming_mi():
144+
midx = MultiIndex.from_product([[1, 2], [1, 2, 3]])
145+
df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx)
146+
with pd.option_context("styler.render.max_elements", 4):
147+
ctx = df.style._translate(True, True)
148+
149+
assert len(ctx["body"][0]) == 5 # 2 indexes + 2 data cols + trimming row
150+
assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items()
151+
assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items()
152+
assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items()
153+
assert len(ctx["body"]) == 3 # 2 data rows + trimming row
154+
155+
assert len(ctx["head"][0]) == 5 # 2 indexes + 2 column headers + trimming col
156+
assert {"attributes": 'colspan="2"'}.items() <= ctx["head"][0][2].items()
157+
158+
118159
class TestStyler:
119160
def setup_method(self, method):
120161
np.random.seed(24)
@@ -939,7 +980,7 @@ def test_get_level_lengths(self):
939980
(1, 4): 1,
940981
(1, 5): 1,
941982
}
942-
result = _get_level_lengths(index, sparsify=True)
983+
result = _get_level_lengths(index, sparsify=True, max_index=100)
943984
tm.assert_dict_equal(result, expected)
944985

945986
expected = {
@@ -956,7 +997,7 @@ def test_get_level_lengths(self):
956997
(1, 4): 1,
957998
(1, 5): 1,
958999
}
959-
result = _get_level_lengths(index, sparsify=False)
1000+
result = _get_level_lengths(index, sparsify=False, max_index=100)
9601001
tm.assert_dict_equal(result, expected)
9611002

9621003
def test_get_level_lengths_un_sorted(self):
@@ -970,7 +1011,7 @@ def test_get_level_lengths_un_sorted(self):
9701011
(1, 2): 1,
9711012
(1, 3): 1,
9721013
}
973-
result = _get_level_lengths(index, sparsify=True)
1014+
result = _get_level_lengths(index, sparsify=True, max_index=100)
9741015
tm.assert_dict_equal(result, expected)
9751016

9761017
expected = {
@@ -983,7 +1024,7 @@ def test_get_level_lengths_un_sorted(self):
9831024
(1, 2): 1,
9841025
(1, 3): 1,
9851026
}
986-
result = _get_level_lengths(index, sparsify=False)
1027+
result = _get_level_lengths(index, sparsify=False, max_index=100)
9871028
tm.assert_dict_equal(result, expected)
9881029

9891030
def test_mi_sparse_index_names(self):

0 commit comments

Comments
 (0)