diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 11e43401f9395..249a8f3f556a1 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -1,5 +1,9 @@ +import importlib +import sys + import matplotlib import numpy as np +import pkg_resources from pandas import ( DataFrame, @@ -13,6 +17,8 @@ except ImportError: from pandas.tools.plotting import andrews_curves +from pandas.plotting._core import _get_plot_backend + matplotlib.use("Agg") @@ -99,4 +105,28 @@ def time_plot_andrews_curves(self): andrews_curves(self.df, "Name") +class BackendLoading: + repeat = 1 + number = 1 + warmup_time = 0 + + def setup(self): + dist = pkg_resources.get_distribution("pandas") + spec = importlib.machinery.ModuleSpec("my_backend", None) + mod = importlib.util.module_from_spec(spec) + mod.plot = lambda *args, **kwargs: 1 + + backends = pkg_resources.get_entry_map("pandas") + my_entrypoint = pkg_resources.EntryPoint( + "pandas_plotting_backend", mod.__name__, dist=dist + ) + backends["pandas_plotting_backends"][mod.__name__] = my_entrypoint + for i in range(10): + backends["pandas_plotting_backends"][str(i)] = my_entrypoint + sys.modules["my_backend"] = mod + + def time_get_plot_backend(self): + _get_plot_backend("my_backend") + + from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 409125b6d6691..a8ae36a1b17db 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -812,6 +812,7 @@ Performance improvements - Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`) - Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`) - Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`) +- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`) .. --------------------------------------------------------------------------- diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 27f8835968b54..5d3db13610845 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,11 +1,14 @@ from __future__ import annotations import importlib +import types from typing import ( TYPE_CHECKING, Sequence, ) +import pkg_resources + from pandas._config import get_option from pandas._typing import IndexLabel @@ -865,7 +868,7 @@ def _get_call_args(backend_name, data, args, kwargs): if args and isinstance(data, ABCSeries): positional_args = str(args)[1:-1] keyword_args = ", ".join( - f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args) + f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args) ) msg = ( "`Series.plot()` should not be called with positional " @@ -876,7 +879,7 @@ def _get_call_args(backend_name, data, args, kwargs): ) raise TypeError(msg) - pos_args = {name: value for value, (name, _) in zip(args, arg_def)} + pos_args = {name: value for (name, _), value in zip(arg_def, args)} if backend_name == "pandas.plotting._matplotlib": kwargs = dict(arg_def, **pos_args, **kwargs) else: @@ -1724,91 +1727,90 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): return self(kind="hexbin", x=x, y=y, C=C, **kwargs) -_backends = {} +_backends: dict[str, types.ModuleType] = {} -def _find_backend(backend: str): +def _load_backend(backend: str) -> types.ModuleType: """ - Find a pandas plotting backend> + Load a pandas plotting backend. Parameters ---------- backend : str The identifier for the backend. Either an entrypoint item registered - with pkg_resources, or a module name. - - Notes - ----- - Modifies _backends with imported backends as a side effect. + with pkg_resources, "matplotlib", or a module name. Returns ------- types.ModuleType The imported backend. """ - import pkg_resources # Delay import for performance. + if backend == "matplotlib": + # Because matplotlib is an optional dependency and first-party backend, + # we need to attempt an import here to raise an ImportError if needed. + try: + module = importlib.import_module("pandas.plotting._matplotlib") + except ImportError: + raise ImportError( + "matplotlib is required for plotting when the " + 'default backend "matplotlib" is selected.' + ) from None + return module + + found_backend = False for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): - if entry_point.name == "matplotlib": - # matplotlib is an optional dependency. When - # missing, this would raise. - continue - _backends[entry_point.name] = entry_point.load() + found_backend = entry_point.name == backend + if found_backend: + module = entry_point.load() + break - try: - return _backends[backend] - except KeyError: + if not found_backend: # Fall back to unregistered, module name approach. try: module = importlib.import_module(backend) + found_backend = True except ImportError: # We re-raise later on. pass - else: - if hasattr(module, "plot"): - # Validate that the interface is implemented when the option - # is set, rather than at plot time. - _backends[backend] = module - return module + + if found_backend: + if hasattr(module, "plot"): + # Validate that the interface is implemented when the option is set, + # rather than at plot time. + return module raise ValueError( - f"Could not find plotting backend '{backend}'. Ensure that you've installed " - f"the package providing the '{backend}' entrypoint, or that the package has a " - "top-level `.plot` method." + f"Could not find plotting backend '{backend}'. Ensure that you've " + f"installed the package providing the '{backend}' entrypoint, or that " + "the package has a top-level `.plot` method." ) -def _get_plot_backend(backend=None): +def _get_plot_backend(backend: str | None = None): """ Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). - The plotting system of pandas has been using matplotlib, but the idea here - is that it can also work with other third-party backends. In the future, - this function will return the backend from a pandas option, and all the - rest of the code in this file will use the backend specified there for the - plotting. + The plotting system of pandas uses matplotlib by default, but the idea here + is that it can also work with other third-party backends. This function + returns the module which provides a top-level `.plot` method that will + actually do the plotting. The backend is specified from a string, which + either comes from the keyword argument `backend`, or, if not specified, from + the option `pandas.options.plotting.backend`. All the rest of the code in + this file uses the backend specified there for the plotting. The backend is imported lazily, as matplotlib is a soft dependency, and pandas can be used without it being installed. + + Notes + ----- + Modifies `_backends` with imported backend as a side effect. """ backend = backend or get_option("plotting.backend") - if backend == "matplotlib": - # Because matplotlib is an optional dependency and first-party backend, - # we need to attempt an import here to raise an ImportError if needed. - try: - import pandas.plotting._matplotlib as module - except ImportError: - raise ImportError( - "matplotlib is required for plotting when the " - 'default backend "matplotlib" is selected.' - ) from None - - _backends["matplotlib"] = module - if backend in _backends: return _backends[backend] - module = _find_backend(backend) + module = _load_backend(backend) _backends[backend] = module return module