From 010f4ca0f0909f63e669256bdfb8faa8224b31e0 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 12 Sep 2021 19:22:48 +0200 Subject: [PATCH 1/3] add just base code files --- doc/source/reference/style.rst | 1 + pandas/io/formats/style.py | 44 +----- pandas/io/formats/style_render.py | 214 +++++++++++++++++++++++++++++- 3 files changed, 220 insertions(+), 39 deletions(-) diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 11d57e66c4773..e67813084e389 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -39,6 +39,7 @@ Style application Styler.apply_index Styler.applymap_index Styler.format + Styler.format_index Styler.hide_index Styler.hide_columns Styler.set_td_classes diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c10ac07d452a8..f4cdb8e1d7173 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -32,7 +32,6 @@ import pandas as pd from pandas import ( - Index, IndexSlice, RangeIndex, ) @@ -58,6 +57,7 @@ Tooltips, maybe_convert_css_to_tuples, non_reducing_slice, + refactor_levels, ) try: @@ -1184,6 +1184,8 @@ def _copy(self, deepcopy: bool = False) -> Styler: ] deep = [ # nested lists or dicts "_display_funcs", + "_display_funcs_index", + "_display_funcs_columns", "hidden_rows", "hidden_columns", "ctx", @@ -1388,7 +1390,7 @@ def _apply_index( axis = self.data._get_axis_number(axis) obj = self.index if axis == 0 else self.columns - levels_ = _refactor_levels(level, obj) + levels_ = refactor_levels(level, obj) data = DataFrame(obj.to_list()).loc[:, levels_] if method == "apply": @@ -2206,7 +2208,7 @@ def hide_index( self.hide_index_names = True return self - levels_ = _refactor_levels(level, self.index) + levels_ = refactor_levels(level, self.index) self.hide_index_ = [ True if lev in levels_ else False for lev in range(self.index.nlevels) ] @@ -2345,7 +2347,7 @@ def hide_columns( self.hide_column_names = True return self - levels_ = _refactor_levels(level, self.columns) + levels_ = refactor_levels(level, self.columns) self.hide_columns_ = [ True if lev in levels_ else False for lev in range(self.columns.nlevels) ] @@ -3532,37 +3534,3 @@ def css_calc(x, left: float, right: float, align: str): index=data.index, columns=data.columns, ) - - -def _refactor_levels( - level: Level | list[Level] | None, - obj: Index, -) -> list[Level]: - """ - Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``. - - Parameters - ---------- - level : int, str, list - Original ``level`` arg supplied to above methods. - obj: - Either ``self.index`` or ``self.columns`` - - Returns - ------- - list : refactored arg with a list of levels to hide - """ - if level is None: - levels_: list[Level] = list(range(obj.nlevels)) - elif isinstance(level, int): - levels_ = [level] - elif isinstance(level, str): - levels_ = [obj._get_level_number(level)] - elif isinstance(level, list): - levels_ = [ - obj._get_level_number(lev) if not isinstance(lev, int) else lev - for lev in level - ] - else: - raise ValueError("`level` must be of type `int`, `str` or list of such") - return levels_ diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index e0f2e68306b40..0ec6a9b470b50 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -22,6 +22,7 @@ from pandas._config import get_option from pandas._libs import lib +from pandas._typing import Level from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.generic import ABCSeries @@ -113,7 +114,13 @@ def __init__( precision = ( get_option("styler.format.precision") if precision is None else precision ) - self._display_funcs: DefaultDict[ # maps (row, col) -> formatting function + self._display_funcs: DefaultDict[ # maps (row, col) -> format func + tuple[int, int], Callable[[Any], str] + ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) + self._display_funcs_index: DefaultDict[ # maps (row, level) -> format func + tuple[int, int], Callable[[Any], str] + ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) + self._display_funcs_columns: DefaultDict[ # maps (level, col) -> format func tuple[int, int], Callable[[Any], str] ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) @@ -376,6 +383,7 @@ def _translate_header( f"{col_heading_class} level{r} col{c}", value, _is_visible(c, r, col_lengths), + display_value=self._display_funcs_columns[(r, c)](value), attributes=( f'colspan="{col_lengths.get((r, c), 0)}"' if col_lengths.get((r, c), 0) > 1 @@ -534,6 +542,7 @@ def _translate_body( f"{row_heading_class} level{c} row{r}", value, _is_visible(r, c, idx_lengths) and not self.hide_index_[c], + display_value=self._display_funcs_index[(r, c)](value), attributes=( f'rowspan="{idx_lengths.get((c, r), 0)}"' if idx_lengths.get((c, r), 0) > 1 @@ -833,6 +842,175 @@ def format( return self + def format_index( + self, + formatter: ExtFormatter | None = None, + axis: int | str = 0, + level: Level | list[Level] | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + ) -> StylerRenderer: + r""" + Format the text display value of index labels or column headers. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + formatter : str, callable, dict or None + Object to define how values are displayed. See notes. + axis : {0, "index", 1, "columns"} + Whether to apply the formatter to the index or column headers. + level : int, str, list + The level(s) over which to apply the generic formatter. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + decimal : str, default "." + Character used as decimal separator for floats, complex and integers + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + + Returns + ------- + self : Styler + + Notes + ----- + This method assigns a formatting function, ``formatter``, to each level label + in the DataFrame's index or column headers. If ``formatter`` is ``None``, + then the default formatter is used. + If a callable then that function should take a label value as input and return + a displayable representation, such as a string. If ``formatter`` is + given as a string this is assumed to be a valid Python format specification + and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, + keys should correspond to MultiIndex level numbers or names, and values should + be string or callable, as above. + + The default formatter currently expresses floats and complex numbers with the + pandas display precision unless using the ``precision`` argument here. The + default formatter does not adjust the representation of missing values unless + the ``na_rep`` argument is used. + + The ``level`` argument defines which levels of a MultiIndex to apply the + method to. If the ``formatter`` argument is given in dict form but does + not include all levels within the level argument then these unspecified levels + will have the default formatter applied. Any levels in the formatter dict + specifically excluded from the level argument will be ignored. + + When using a ``formatter`` string the dtypes must be compatible, otherwise a + `ValueError` will be raised. + + Examples + -------- + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0]]) + >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP + 2.000 MISS 4.000 + 0 1 2 3 + + Using a ``formatter`` specification on consistent dtypes in a level + + >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP + 2.00 MISS 4.00 + 0 1 2 3 + + Using the default ``formatter`` for unspecified levels + + >>> df = pd.DataFrame([[1, 2, 3]], + ... columns=pd.MultiIndex.from_arrays([["a", "a", "b"],[2, np.nan, 4]])) + >>> df.style.format_index({0: lambda v: upper(v)}, axis=1, precision=1) + ... # doctest: +SKIP + A B + 2.0 nan 4.0 + 0 1 2 3 + + Using a callable ``formatter`` function. + + >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' + >>> df.style.format_index(func, axis=1, na_rep='MISS') + ... # doctest: +SKIP + STRING STRING + FLOAT MISS FLOAT + 0 1 2 3 + + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. + + >>> df = pd.DataFrame([[1, 2, 3]], columns=['"A"', 'A&B', None]) + >>> s = df.style.format_index('$ {0}', axis=1, escape="html", na_rep="NA") + $ "A" + $ A&B + NA + ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([[1, 2, 3]], columns=["123", "~", "$%#"]) + >>> df.style.format_index("\\textbf{{{}}}", escape="latex", axis=1).to_latex() + ... # doctest: +SKIP + \begin{tabular}{lrrr} + {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\ + 0 & 1 & 2 & 3 \\ + \end{tabular} + """ + axis = self.data._get_axis_number(axis) + if axis == 0: + display_funcs_, obj = self._display_funcs_index, self.index + else: + display_funcs_, obj = self._display_funcs_columns, self.columns + levels_ = refactor_levels(level, obj) + + if all( + ( + formatter is None, + level is None, + precision is None, + decimal == ".", + thousands is None, + na_rep is None, + escape is None, + ) + ): + display_funcs_.clear() + return self # clear the formatter / revert to default and avoid looping + + if not isinstance(formatter, dict): + formatter = {level: formatter for level in levels_} + else: + formatter = { + obj._get_level_number(level): formatter_ + for level, formatter_ in formatter.items() + } + + for lvl in levels_: + format_func = _maybe_wrap_formatter( + formatter.get(lvl), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + ) + + for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: + display_funcs_[idx] = format_func + + return self + def _element( html_element: str, @@ -1172,6 +1350,40 @@ def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: return style +def refactor_levels( + level: Level | list[Level] | None, + obj: Index, +) -> list[int]: + """ + Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``. + + Parameters + ---------- + level : int, str, list + Original ``level`` arg supplied to above methods. + obj: + Either ``self.index`` or ``self.columns`` + + Returns + ------- + list : refactored arg with a list of levels to hide + """ + if level is None: + levels_: list[int] = list(range(obj.nlevels)) + elif isinstance(level, int): + levels_ = [level] + elif isinstance(level, str): + levels_ = [obj._get_level_number(level)] + elif isinstance(level, list): + levels_ = [ + obj._get_level_number(lev) if not isinstance(lev, int) else lev + for lev in level + ] + else: + raise ValueError("`level` must be of type `int`, `str` or list of such") + return levels_ + + class Tooltips: """ An extension to ``Styler`` that allows for and manipulates tooltips on hover From ee6af2f22c3478880d34110e8cca6c0c038eaa77 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 12 Sep 2021 21:41:16 +0200 Subject: [PATCH 2/3] reduce --- doc/source/reference/style.rst | 1 - pandas/io/formats/style_render.py | 177 ------------------------------ 2 files changed, 178 deletions(-) diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index e67813084e389..11d57e66c4773 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -39,7 +39,6 @@ Style application Styler.apply_index Styler.applymap_index Styler.format - Styler.format_index Styler.hide_index Styler.hide_columns Styler.set_td_classes diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 0ec6a9b470b50..a63396b5637bc 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -117,12 +117,6 @@ def __init__( self._display_funcs: DefaultDict[ # maps (row, col) -> format func tuple[int, int], Callable[[Any], str] ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) - self._display_funcs_index: DefaultDict[ # maps (row, level) -> format func - tuple[int, int], Callable[[Any], str] - ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) - self._display_funcs_columns: DefaultDict[ # maps (level, col) -> format func - tuple[int, int], Callable[[Any], str] - ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) def _render_html( self, @@ -383,7 +377,6 @@ def _translate_header( f"{col_heading_class} level{r} col{c}", value, _is_visible(c, r, col_lengths), - display_value=self._display_funcs_columns[(r, c)](value), attributes=( f'colspan="{col_lengths.get((r, c), 0)}"' if col_lengths.get((r, c), 0) > 1 @@ -542,7 +535,6 @@ def _translate_body( f"{row_heading_class} level{c} row{r}", value, _is_visible(r, c, idx_lengths) and not self.hide_index_[c], - display_value=self._display_funcs_index[(r, c)](value), attributes=( f'rowspan="{idx_lengths.get((c, r), 0)}"' if idx_lengths.get((c, r), 0) > 1 @@ -842,175 +834,6 @@ def format( return self - def format_index( - self, - formatter: ExtFormatter | None = None, - axis: int | str = 0, - level: Level | list[Level] | None = None, - na_rep: str | None = None, - precision: int | None = None, - decimal: str = ".", - thousands: str | None = None, - escape: str | None = None, - ) -> StylerRenderer: - r""" - Format the text display value of index labels or column headers. - - .. versionadded:: 1.4.0 - - Parameters - ---------- - formatter : str, callable, dict or None - Object to define how values are displayed. See notes. - axis : {0, "index", 1, "columns"} - Whether to apply the formatter to the index or column headers. - level : int, str, list - The level(s) over which to apply the generic formatter. - na_rep : str, optional - Representation for missing values. - If ``na_rep`` is None, no special formatting is applied. - precision : int, optional - Floating point precision to use for display purposes, if not determined by - the specified ``formatter``. - decimal : str, default "." - Character used as decimal separator for floats, complex and integers - thousands : str, optional, default None - Character used as thousands separator for floats, complex and integers - escape : str, optional - Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` - in cell display string with HTML-safe sequences. - Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, - ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with - LaTeX-safe sequences. - Escaping is done before ``formatter``. - - Returns - ------- - self : Styler - - Notes - ----- - This method assigns a formatting function, ``formatter``, to each level label - in the DataFrame's index or column headers. If ``formatter`` is ``None``, - then the default formatter is used. - If a callable then that function should take a label value as input and return - a displayable representation, such as a string. If ``formatter`` is - given as a string this is assumed to be a valid Python format specification - and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, - keys should correspond to MultiIndex level numbers or names, and values should - be string or callable, as above. - - The default formatter currently expresses floats and complex numbers with the - pandas display precision unless using the ``precision`` argument here. The - default formatter does not adjust the representation of missing values unless - the ``na_rep`` argument is used. - - The ``level`` argument defines which levels of a MultiIndex to apply the - method to. If the ``formatter`` argument is given in dict form but does - not include all levels within the level argument then these unspecified levels - will have the default formatter applied. Any levels in the formatter dict - specifically excluded from the level argument will be ignored. - - When using a ``formatter`` string the dtypes must be compatible, otherwise a - `ValueError` will be raised. - - Examples - -------- - Using ``na_rep`` and ``precision`` with the default ``formatter`` - - >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0]]) - >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP - 2.000 MISS 4.000 - 0 1 2 3 - - Using a ``formatter`` specification on consistent dtypes in a level - - >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP - 2.00 MISS 4.00 - 0 1 2 3 - - Using the default ``formatter`` for unspecified levels - - >>> df = pd.DataFrame([[1, 2, 3]], - ... columns=pd.MultiIndex.from_arrays([["a", "a", "b"],[2, np.nan, 4]])) - >>> df.style.format_index({0: lambda v: upper(v)}, axis=1, precision=1) - ... # doctest: +SKIP - A B - 2.0 nan 4.0 - 0 1 2 3 - - Using a callable ``formatter`` function. - - >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' - >>> df.style.format_index(func, axis=1, na_rep='MISS') - ... # doctest: +SKIP - STRING STRING - FLOAT MISS FLOAT - 0 1 2 3 - - Using a ``formatter`` with HTML ``escape`` and ``na_rep``. - - >>> df = pd.DataFrame([[1, 2, 3]], columns=['"A"', 'A&B', None]) - >>> s = df.style.format_index('$ {0}', axis=1, escape="html", na_rep="NA") - $ "A" - $ A&B - NA - ... - - Using a ``formatter`` with LaTeX ``escape``. - - >>> df = pd.DataFrame([[1, 2, 3]], columns=["123", "~", "$%#"]) - >>> df.style.format_index("\\textbf{{{}}}", escape="latex", axis=1).to_latex() - ... # doctest: +SKIP - \begin{tabular}{lrrr} - {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\ - 0 & 1 & 2 & 3 \\ - \end{tabular} - """ - axis = self.data._get_axis_number(axis) - if axis == 0: - display_funcs_, obj = self._display_funcs_index, self.index - else: - display_funcs_, obj = self._display_funcs_columns, self.columns - levels_ = refactor_levels(level, obj) - - if all( - ( - formatter is None, - level is None, - precision is None, - decimal == ".", - thousands is None, - na_rep is None, - escape is None, - ) - ): - display_funcs_.clear() - return self # clear the formatter / revert to default and avoid looping - - if not isinstance(formatter, dict): - formatter = {level: formatter for level in levels_} - else: - formatter = { - obj._get_level_number(level): formatter_ - for level, formatter_ in formatter.items() - } - - for lvl in levels_: - format_func = _maybe_wrap_formatter( - formatter.get(lvl), - na_rep=na_rep, - precision=precision, - decimal=decimal, - thousands=thousands, - escape=escape, - ) - - for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: - display_funcs_[idx] = format_func - - return self - def _element( html_element: str, From d6953e3db00acf36e92be98a2ba4919a7b1eba9a Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 12 Sep 2021 21:42:53 +0200 Subject: [PATCH 3/3] reduce --- pandas/io/formats/style.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f4cdb8e1d7173..ed142017a066b 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1184,8 +1184,6 @@ def _copy(self, deepcopy: bool = False) -> Styler: ] deep = [ # nested lists or dicts "_display_funcs", - "_display_funcs_index", - "_display_funcs_columns", "hidden_rows", "hidden_columns", "ctx",