From 6ae52d4e1a1da2b0b7db6cf7274901f44e98db83 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 8 Dec 2017 14:45:57 -0600 Subject: [PATCH 01/30] ENH: Added public accessor registrar Adds new methods for registing custom accessors to pandas objects. This will be helpful for implementing https://github.com/pandas-dev/pandas/issues/18767 outside of pandas. Closes https://github.com/pandas-dev/pandas/issues/14781 --- LICENSES/XARRAY_LICENSE | 191 +++++++++++++++++++++++++ doc/source/internals.rst | 42 ++++++ doc/source/whatsnew/v0.22.0.txt | 44 ++++++ pandas/core/accessor.py | 111 ++++++++++++++ pandas/core/api.py | 3 + pandas/errors/__init__.py | 4 + pandas/tests/api/test_api.py | 5 +- pandas/tests/test_register_accessor.py | 75 ++++++++++ 8 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 LICENSES/XARRAY_LICENSE create mode 100644 pandas/tests/test_register_accessor.py diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE new file mode 100644 index 0000000000000..37ec93a14fdcd --- /dev/null +++ b/LICENSES/XARRAY_LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 3d96b93de4cc9..1be3d16681034 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -89,6 +89,46 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but if you compute the levels and labels yourself, please be careful. +.. _register-accessors: + +Registering Custom Accessors +---------------------------- + +Libraries can use the decorators :func:`register_dataframe_accessor`, +:func:`register_series_accessor`, and :func:`register_index_accessor`, to add +additional "namespaces" to pandas objects. All of these follow a similar +convention: you decorate a class, providing the name of attribute to add. The +class's `__init__` method gets the object being decorated. For example: + +.. ipython:: python + + @pd.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Now users can access your methods using the `geo` namespace: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This can be a convenient way to extend pandas objects without subclassing them. + .. _ref-subclassing-pandas: Subclassing pandas Data Structures @@ -100,6 +140,8 @@ Subclassing pandas Data Structures 2. Use *composition*. See `here `_. + 3. Extending by :ref:`registering an accessor ` + This section describes how to subclass ``pandas`` data structures to meet more specific needs. There are 2 points which need attention: 1. Override constructor properties. diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index ae6d0816abc41..f1224714634f5 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -119,6 +119,50 @@ Current Behavior s.rank(na_option='top') +Extending Pandas Objects with New Accessors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas uses accessors to group together many related methods into a namespace on +``Series`` or ``Index`` objects. For example, ``Series.str`` for string methods, +or ``Series.dt`` for datetime methods. Inspired by xarray, pandas now officially +supports registering custom accessors in library code. + + +.. code-block:: python + + import pandas as pd + + @pd.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Back in an interactive IPython session: + +.. code-block:: python + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This provides a convenient alternative to subclassing or composition. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. + .. _whatsnew_0220.enhancements.other: Other Enhancements diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 53ead5e8f74a3..526825fdedc3b 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -5,7 +5,12 @@ that can be mixed into or pinned onto other pandas classes. """ +import traceback +import warnings + from pandas.core.common import AbstractMethodError +from pandas.compat import PY2 +from pandas.errors import AccessorRegistrationWarning class DirNamesMixin(object): @@ -129,3 +134,109 @@ def f(self, *args, **kwargs): # don't overwrite existing methods/properties if overwrite or not hasattr(cls, name): setattr(cls, name, f) + + +# Ported with modifications from xarray +# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py + + +class _CachedAccessor(object): + """Custom property-like object (descriptor) for caching accessors.""" + def __init__(self, name, accessor): + self._name = name + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + # we're accessing the attribute of the class, i.e., Dataset.geo + return self._accessor + try: + accessor_obj = self._accessor(obj) + except AttributeError: + # TODO + # __getattr__ on data object will swallow any AttributeErrors + # raised when initializing the accessor, so we need to raise + # as something else (GH933): + msg = 'error initializing %r accessor.' % self._name + if PY2: + msg += ' Full traceback:\n' + traceback.format_exc() + raise RuntimeError(msg) + # Replace the property with the accessor object. Inspired by: + # http://www.pydanny.com/cached-property.html + # We need to use object.__setattr__ because we overwrite __setattr__ on + # AttrAccessMixin. + object.__setattr__(obj, self._name, accessor_obj) + return accessor_obj + + +def _register_accessor(name, cls): + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + 'registration of accessor {!r} under name {!r} for type ' + '{!r} is overriding a preexisting attribute with the same ' + 'name.'.format(accessor, name, cls), + AccessorRegistrationWarning, + stacklevel=2) + setattr(cls, name, _CachedAccessor(name, accessor)) + return accessor + return decorator + + +def register_dataframe_accessor(name): + """Register a custom accessor on pandas.DataFrame objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + Examples + -------- + + In your library code:: + + import pandas as pd + + @pd.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + + Back in an interactive IPython session: + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + + See also + -------- + register_series_accessor + register_index_accessor + """ + from pandas import DataFrame + return _register_accessor(name, DataFrame) + + +def register_series_accessor(name): + from pandas import Series + return _register_accessor(name, Series) + + +def register_index_accessor(name): + from pandas import Index + return _register_accessor(name, Index) diff --git a/pandas/core/api.py b/pandas/core/api.py index 8a624da362976..597ed29709d18 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,6 +5,9 @@ import numpy as np from pandas.core.algorithms import factorize, unique, value_counts +from pandas.core.accessor import (register_dataframe_accessor, + register_index_accessor, + register_series_accessor) from pandas.core.dtypes.missing import isna, isnull, notna, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 42b3bdd4991a9..b4026336c1ded 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -65,3 +65,7 @@ class MergeError(ValueError): Error raised when problems arise during merging due to problems with input data. Subclass of `ValueError`. """ + + +class AccessorRegistrationWarning(Warning): + """Warning for attribute conflicts in accessor registration.""" \ No newline at end of file diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index e47f1919faaf5..a353f6df7cec1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -70,7 +70,10 @@ class TestPDApi(Base): 'period_range', 'pivot', 'pivot_table', 'qcut', 'show_versions', 'timedelta_range', 'unique', - 'value_counts', 'wide_to_long'] + 'value_counts', 'wide_to_long', + 'register_series_accessor', + 'register_dataframe_accessor', + 'register_index_accessor'] # top-level option funcs funcs_option = ['reset_option', 'describe_option', 'get_option', diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py new file mode 100644 index 0000000000000..f7564aaa8daa9 --- /dev/null +++ b/pandas/tests/test_register_accessor.py @@ -0,0 +1,75 @@ +import contextlib + +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.errors import AccessorRegistrationWarning + + +@contextlib.contextmanager +def ensure_removed(obj, attr): + """Ensure that an attribute added to 'obj' during the test is + removed when we're done""" + try: + yield + finally: + try: + delattr(obj, attr) + except AttributeError: + pass + + +class MyAccessor(object): + + def __init__(self, obj): + self.obj = obj + self.item = 'item' + + @property + def prop(self): + return self.item + + def method(self): + return self.item + + +@pytest.mark.parametrize('obj, registrar', [ + (pd.Series, pd.register_series_accessor), + (pd.DataFrame, pd.register_dataframe_accessor), + (pd.Index, pd.register_index_accessor) +]) +def test_series_register(obj, registrar): + with ensure_removed(obj, 'mine'): + before = set(dir(obj)) + registrar('mine')(MyAccessor) + assert obj([]).mine.prop == 'item' + after = set(dir(obj)) + assert (before ^ after) == {'mine'} + + +def test_accessor_works(): + with ensure_removed(pd.Series, 'mine'): + pd.register_series_accessor('mine')(MyAccessor) + + s = pd.Series([1, 2]) + assert s.mine.obj is s + + assert s.mine.prop == 'item' + assert s.mine.method() == 'item' + + +def test_overwrite_warns(): + # Need to restore mean + mean = pd.Series.mean + try: + with tm.assert_produces_warning(AccessorRegistrationWarning) as w: + pd.register_series_accessor('mean')(MyAccessor) + s = pd.Series([1, 2]) + assert s.mean.prop == 'item' + msg = str(w[0].message) + assert 'mean' in msg + assert 'MyAccessor' in msg + assert 'Series' in msg + finally: + pd.Series.mean = mean From 998bb28aae5edc3ba6832c066f1dc7bca9ff7323 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 18 Dec 2017 15:17:38 -0600 Subject: [PATCH 02/30] PEP8 --- pandas/errors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index b4026336c1ded..c849cfeffc2d7 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -68,4 +68,4 @@ class MergeError(ValueError): class AccessorRegistrationWarning(Warning): - """Warning for attribute conflicts in accessor registration.""" \ No newline at end of file + """Warning for attribute conflicts in accessor registration.""" From 9b20a5cd8aa5e5167ce2f4e2eeaf75aa5afdfa07 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Dec 2017 09:57:18 -0600 Subject: [PATCH 03/30] Moved to extensions --- doc/source/internals.rst | 9 +++++---- pandas/__init__.py | 1 + pandas/core/api.py | 3 --- pandas/extensions/__init__.py | 4 ++++ pandas/tests/api/test_api.py | 15 ++++++++++----- pandas/tests/test_register_accessor.py | 10 +++++----- 6 files changed, 25 insertions(+), 17 deletions(-) create mode 100644 pandas/extensions/__init__.py diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 1be3d16681034..e6d07d148b601 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -94,10 +94,11 @@ if you compute the levels and labels yourself, please be careful. Registering Custom Accessors ---------------------------- -Libraries can use the decorators :func:`register_dataframe_accessor`, -:func:`register_series_accessor`, and :func:`register_index_accessor`, to add -additional "namespaces" to pandas objects. All of these follow a similar -convention: you decorate a class, providing the name of attribute to add. The +Libraries can use the decorators +:func:`pandas.extensions.register_dataframe_accessor`, +:func:`pandas.extensions.register_series_accessor`, and +:func:`pandas.extensions.register_index_accessor`, to add additional "namespaces" to +pandas objects. All of these follow a similar convention: you decorate a class, providing the name of attribute to add. The class's `__init__` method gets the object being decorated. For example: .. ipython:: python diff --git a/pandas/__init__.py b/pandas/__init__.py index 8d9b75ccd6c2c..8fd824fc45de2 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -57,6 +57,7 @@ from pandas.util._print_versions import show_versions from pandas.io.api import * from pandas.util._tester import test +from pandas.extensions import * import pandas.testing # extension module deprecations diff --git a/pandas/core/api.py b/pandas/core/api.py index 597ed29709d18..8a624da362976 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,9 +5,6 @@ import numpy as np from pandas.core.algorithms import factorize, unique, value_counts -from pandas.core.accessor import (register_dataframe_accessor, - register_index_accessor, - register_series_accessor) from pandas.core.dtypes.missing import isna, isnull, notna, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper diff --git a/pandas/extensions/__init__.py b/pandas/extensions/__init__.py new file mode 100644 index 0000000000000..64f5e8fb939a4 --- /dev/null +++ b/pandas/extensions/__init__.py @@ -0,0 +1,4 @@ +"""Public API for extending panadas objects.""" +from pandas.core.accessor import (register_dataframe_accessor, # noqa + register_index_accessor, + register_series_accessor) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index a353f6df7cec1..68fc8d67f0486 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -30,7 +30,7 @@ class TestPDApi(Base): ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages - lib = ['api', 'compat', 'core', 'errors', 'pandas', + lib = ['api', 'compat', 'core', 'errors', 'extensions', 'pandas', 'plotting', 'test', 'testing', 'tools', 'tseries', 'util', 'options', 'io'] @@ -41,6 +41,13 @@ class TestPDApi(Base): # misc misc = ['IndexSlice', 'NaT'] + # extension points + extensions = [ + 'register_dataframe_accessor', + 'register_index_accessor', + 'register_series_accessor', + ] + # top-level classes classes = ['Categorical', 'CategoricalIndex', 'DataFrame', 'DateOffset', 'DatetimeIndex', 'ExcelFile', 'ExcelWriter', 'Float64Index', @@ -70,10 +77,7 @@ class TestPDApi(Base): 'period_range', 'pivot', 'pivot_table', 'qcut', 'show_versions', 'timedelta_range', 'unique', - 'value_counts', 'wide_to_long', - 'register_series_accessor', - 'register_dataframe_accessor', - 'register_index_accessor'] + 'value_counts', 'wide_to_long'] # top-level option funcs funcs_option = ['reset_option', 'describe_option', 'get_option', @@ -114,6 +118,7 @@ def test_api(self): self.check(pd, self.lib + self.misc + + self.extensions + self.modules + self.deprecated_modules + self.classes + self.deprecated_classes + self.deprecated_classes_in_future + diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index f7564aaa8daa9..0c33e09de3318 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -35,9 +35,9 @@ def method(self): @pytest.mark.parametrize('obj, registrar', [ - (pd.Series, pd.register_series_accessor), - (pd.DataFrame, pd.register_dataframe_accessor), - (pd.Index, pd.register_index_accessor) + (pd.Series, pd.extensions.register_series_accessor), + (pd.DataFrame, pd.extensions.register_dataframe_accessor), + (pd.Index, pd.extensions.register_index_accessor) ]) def test_series_register(obj, registrar): with ensure_removed(obj, 'mine'): @@ -50,7 +50,7 @@ def test_series_register(obj, registrar): def test_accessor_works(): with ensure_removed(pd.Series, 'mine'): - pd.register_series_accessor('mine')(MyAccessor) + pd.extensions.register_series_accessor('mine')(MyAccessor) s = pd.Series([1, 2]) assert s.mine.obj is s @@ -64,7 +64,7 @@ def test_overwrite_warns(): mean = pd.Series.mean try: with tm.assert_produces_warning(AccessorRegistrationWarning) as w: - pd.register_series_accessor('mean')(MyAccessor) + pd.extensions.register_series_accessor('mean')(MyAccessor) s = pd.Series([1, 2]) assert s.mean.prop == 'item' msg = str(w[0].message) From 9005e1cb6b417ddeec160cb7914cd4164d7912ea Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Dec 2017 10:02:50 -0600 Subject: [PATCH 04/30] More docs --- doc/source/api.rst | 14 ++++++++++++++ doc/source/internals.rst | 2 +- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/core/accessor.py | 30 ++++++++++++++++++++++++++++-- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 3edaadba64762..fcedf59874d83 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2495,6 +2495,20 @@ Scalar introspection api.types.is_re_compilable api.types.is_scalar +Extensions +---------- + +These are primarily intented for library authors looking to extend pandas +objects. + +.. currentmodule:: pandas + +.. autosummary:: + :toctree: generated/ + + extensions.register_dataframe_accessor + extensions.register_series_accessor + extensions.register_index_accessor .. This is to prevent warnings in the doc build. We don't want to encourage .. these methods. diff --git a/doc/source/internals.rst b/doc/source/internals.rst index e6d07d148b601..e5a33a5e8c2ae 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -103,7 +103,7 @@ class's `__init__` method gets the object being decorated. For example: .. ipython:: python - @pd.register_dataframe_accessor("geo") + @pd.extensions.register_dataframe_accessor("geo") class GeoAccessor(object): def __init__(self, pandas_obj): self._obj = pandas_obj diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index f1224714634f5..7f0425aeab696 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -132,7 +132,7 @@ supports registering custom accessors in library code. import pandas as pd - @pd.register_dataframe_accessor("geo") + @pd.extensions.register_dataframe_accessor("geo") class GeoAccessor(object): def __init__(self, pandas_obj): self._obj = pandas_obj diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 526825fdedc3b..58f03b70fc6e9 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -199,7 +199,7 @@ def register_dataframe_accessor(name): import pandas as pd - @pd.register_dataframe_accessor("geo") + @pd.extensions.register_dataframe_accessor("geo") class GeoAccessor(object): def __init__(self, pandas_obj): self._obj = pandas_obj @@ -225,18 +225,44 @@ def plot(self): See also -------- - register_series_accessor register_index_accessor + register_series_accessor """ from pandas import DataFrame return _register_accessor(name, DataFrame) def register_series_accessor(name): + """Register a custom accessor on pandas.Series objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + See Also + -------- + register_dataframe_accessor + register_index_accessor + """ from pandas import Series return _register_accessor(name, Series) def register_index_accessor(name): + """Register a custom accessor on pandas.Index objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + See Also + -------- + register_index_accessor + register_series_accessor + """ from pandas import Index return _register_accessor(name, Index) From 33a9f3f1082debc990e754670288d09751229f39 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Dec 2017 10:33:50 -0600 Subject: [PATCH 05/30] Fix see also --- pandas/core/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 58f03b70fc6e9..a9f34ede82e26 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -261,7 +261,7 @@ def register_index_accessor(name): See Also -------- - register_index_accessor + register_dataframe_accessor register_series_accessor """ from pandas import Index From 11edc425620c6439ad8f1a94503e984171215199 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Jan 2018 10:40:57 -0600 Subject: [PATCH 06/30] DOC: Added whatsnew --- doc/source/whatsnew/v0.23.0.txt | 50 +++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bd3bee507baa3..58d9af2c411b1 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -119,6 +119,56 @@ Current Behavior s.rank(na_option='top') + +Extending Pandas Objects with New Accessors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas uses accessors to group together many related methods into a namespace on +``Series`` or ``Index`` objects. For example, ``Series.str`` for string methods, +or ``Series.dt`` for datetime methods. Inspired by xarray, pandas now officially +supports registering custom accessors in library code. + +Once you've implemented an accessor, register it with one or more of the decorators + +* :func:`pandas.extensions.register_dataframe_accessor` +* :func:`pandas.extensions.register_series_accessor` +* :func:`pandas.extensions.register_index_accessor` + +.. code-block:: python + + import pandas as pd + + @pd.extensions.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Back in an interactive IPython session: + +.. code-block:: python + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This provides a convenient alternative to subclassing or composition. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. + .. _whatsnew_0230.enhancements.other: Other Enhancements From 682bb848ba75b3d83ce6e357a2368ec1aeceeb48 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Jan 2018 10:42:15 -0600 Subject: [PATCH 07/30] Move to api --- pandas/{ => api}/extensions/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pandas/{ => api}/extensions/__init__.py (100%) diff --git a/pandas/extensions/__init__.py b/pandas/api/extensions/__init__.py similarity index 100% rename from pandas/extensions/__init__.py rename to pandas/api/extensions/__init__.py From 964356f37a15533824a4c5478e68575fef7c87ff Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Jan 2018 10:56:30 -0600 Subject: [PATCH 08/30] Update post review --- doc/source/api.rst | 6 +++--- doc/source/internals.rst | 8 ++++---- doc/source/whatsnew/v0.23.0.txt | 6 +++--- pandas/__init__.py | 1 - pandas/api/__init__.py | 1 + pandas/api/extensions.py | 4 ++++ pandas/core/accessor.py | 2 +- pandas/tests/api/test_api.py | 12 ++---------- pandas/tests/test_register_accessor.py | 10 +++++----- 9 files changed, 23 insertions(+), 27 deletions(-) create mode 100644 pandas/api/extensions.py diff --git a/doc/source/api.rst b/doc/source/api.rst index f2dd03d32d711..4132c01292fe4 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2509,9 +2509,9 @@ objects. .. autosummary:: :toctree: generated/ - extensions.register_dataframe_accessor - extensions.register_series_accessor - extensions.register_index_accessor + api.extensions.register_dataframe_accessor + api.extensions.register_series_accessor + api.extensions.register_index_accessor .. This is to prevent warnings in the doc build. We don't want to encourage .. these methods. diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 0697f6ec4d588..c1aedc4420acb 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -95,15 +95,15 @@ Registering Custom Accessors ---------------------------- Libraries can use the decorators -:func:`pandas.extensions.register_dataframe_accessor`, -:func:`pandas.extensions.register_series_accessor`, and -:func:`pandas.extensions.register_index_accessor`, to add additional "namespaces" to +:func:`pandas.api.extensions.register_dataframe_accessor`, +:func:`pandas.api.extensions.register_series_accessor`, and +:func:`pandas.api.extensions.register_index_accessor`, to add additional "namespaces" to pandas objects. All of these follow a similar convention: you decorate a class, providing the name of attribute to add. The class's `__init__` method gets the object being decorated. For example: .. ipython:: python - @pd.extensions.register_dataframe_accessor("geo") + @pd.api.extensions.register_dataframe_accessor("geo") class GeoAccessor(object): def __init__(self, pandas_obj): self._obj = pandas_obj diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 58d9af2c411b1..b89a1752cd0cb 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -130,9 +130,9 @@ supports registering custom accessors in library code. Once you've implemented an accessor, register it with one or more of the decorators -* :func:`pandas.extensions.register_dataframe_accessor` -* :func:`pandas.extensions.register_series_accessor` -* :func:`pandas.extensions.register_index_accessor` +* :func:`pandas.api.extensions.register_dataframe_accessor` +* :func:`pandas.api.extensions.register_series_accessor` +* :func:`pandas.api.extensions.register_index_accessor` .. code-block:: python diff --git a/pandas/__init__.py b/pandas/__init__.py index f768a4ac8325e..93c5b6484b840 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -57,7 +57,6 @@ from pandas.util._print_versions import show_versions from pandas.io.api import * from pandas.util._tester import test -from pandas.extensions import * import pandas.testing # extension module deprecations diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py index fcbf42f6dabc4..2645a3084c9d1 100644 --- a/pandas/api/__init__.py +++ b/pandas/api/__init__.py @@ -1 +1,2 @@ """ public toolkit API """ +from . import types, extensions diff --git a/pandas/api/extensions.py b/pandas/api/extensions.py new file mode 100644 index 0000000000000..64f5e8fb939a4 --- /dev/null +++ b/pandas/api/extensions.py @@ -0,0 +1,4 @@ +"""Public API for extending panadas objects.""" +from pandas.core.accessor import (register_dataframe_accessor, # noqa + register_index_accessor, + register_series_accessor) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index d454fb7946d84..aeb80ed1b359f 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -8,7 +8,6 @@ import traceback import warnings -from pandas.core.common import AbstractMethodError from pandas.compat import PY2 from pandas.errors import AccessorRegistrationWarning @@ -70,6 +69,7 @@ class PandasDelegate(object): @classmethod def _make_accessor(cls, data): + from pandas.core.common import AbstractMethodError raise AbstractMethodError("_make_accessor should be implemented" "by subclass and return an instance" "of `cls`.") diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 68fc8d67f0486..344f114c69309 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -30,7 +30,7 @@ class TestPDApi(Base): ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages - lib = ['api', 'compat', 'core', 'errors', 'extensions', 'pandas', + lib = ['api', 'compat', 'core', 'errors', 'pandas', 'plotting', 'test', 'testing', 'tools', 'tseries', 'util', 'options', 'io'] @@ -41,13 +41,6 @@ class TestPDApi(Base): # misc misc = ['IndexSlice', 'NaT'] - # extension points - extensions = [ - 'register_dataframe_accessor', - 'register_index_accessor', - 'register_series_accessor', - ] - # top-level classes classes = ['Categorical', 'CategoricalIndex', 'DataFrame', 'DateOffset', 'DatetimeIndex', 'ExcelFile', 'ExcelWriter', 'Float64Index', @@ -118,7 +111,6 @@ def test_api(self): self.check(pd, self.lib + self.misc + - self.extensions + self.modules + self.deprecated_modules + self.classes + self.deprecated_classes + self.deprecated_classes_in_future + @@ -131,7 +123,7 @@ def test_api(self): class TestApi(Base): - allowed = ['types'] + allowed = ['types', 'extensions'] def test_api(self): diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index 0c33e09de3318..a44cf9d4de393 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -35,9 +35,9 @@ def method(self): @pytest.mark.parametrize('obj, registrar', [ - (pd.Series, pd.extensions.register_series_accessor), - (pd.DataFrame, pd.extensions.register_dataframe_accessor), - (pd.Index, pd.extensions.register_index_accessor) + (pd.Series, pd.api.extensions.register_series_accessor), + (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), + (pd.Index, pd.api.extensions.register_index_accessor) ]) def test_series_register(obj, registrar): with ensure_removed(obj, 'mine'): @@ -50,7 +50,7 @@ def test_series_register(obj, registrar): def test_accessor_works(): with ensure_removed(pd.Series, 'mine'): - pd.extensions.register_series_accessor('mine')(MyAccessor) + pd.api.extensions.register_series_accessor('mine')(MyAccessor) s = pd.Series([1, 2]) assert s.mine.obj is s @@ -64,7 +64,7 @@ def test_overwrite_warns(): mean = pd.Series.mean try: with tm.assert_produces_warning(AccessorRegistrationWarning) as w: - pd.extensions.register_series_accessor('mean')(MyAccessor) + pd.api.extensions.register_series_accessor('mean')(MyAccessor) s = pd.Series([1, 2]) assert s.mean.prop == 'item' msg = str(w[0].message) From ec505e4f1cb6625cb21610f83d92313a344c9031 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Jan 2018 10:57:04 -0600 Subject: [PATCH 09/30] flake8 --- pandas/api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py index 2645a3084c9d1..afff059e7b601 100644 --- a/pandas/api/__init__.py +++ b/pandas/api/__init__.py @@ -1,2 +1,2 @@ """ public toolkit API """ -from . import types, extensions +from . import types, extensions # noqa From e76cecff5132eea055c343e77cf0a4a5e8e69712 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Jan 2018 11:36:56 -0600 Subject: [PATCH 10/30] Raise the underlying error instead of a RuntimeError --- pandas/core/accessor.py | 14 ++------------ pandas/tests/test_register_accessor.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index aeb80ed1b359f..3083941ed0a67 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -150,21 +150,11 @@ def __get__(self, obj, cls): if obj is None: # we're accessing the attribute of the class, i.e., Dataset.geo return self._accessor - try: - accessor_obj = self._accessor(obj) - except AttributeError: - # TODO - # __getattr__ on data object will swallow any AttributeErrors - # raised when initializing the accessor, so we need to raise - # as something else (GH933): - msg = 'error initializing %r accessor.' % self._name - if PY2: - msg += ' Full traceback:\n' + traceback.format_exc() - raise RuntimeError(msg) + accessor_obj = self._accessor(obj) # Replace the property with the accessor object. Inspired by: # http://www.pydanny.com/cached-property.html # We need to use object.__setattr__ because we overwrite __setattr__ on - # AttrAccessMixin. + # NDFrame object.__setattr__(obj, self._name, accessor_obj) return accessor_obj diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index a44cf9d4de393..11da629a2b010 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -73,3 +73,16 @@ def test_overwrite_warns(): assert 'Series' in msg finally: pd.Series.mean = mean + + +def test_raises_attribute_error(): + + with ensure_removed(pd.Series, 'bad'): + + @pd.api.extensions.register_series_accessor("bad") + class Bad(object): + def __init__(self, data): + raise AttributeError("whoops") + + with tm.assert_raises_regex(AttributeError, "whoops"): + pd.Series([]).bad From 19e9fa048fb6920a4bafb5c384ef45bbd63dabc0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 07:10:08 -0600 Subject: [PATCH 11/30] str validate --- pandas/core/register_accessors.py | 102 ++++++++++++++++++++++++++++++ pandas/core/strings.py | 61 ++++++++++-------- 2 files changed, 135 insertions(+), 28 deletions(-) create mode 100644 pandas/core/register_accessors.py diff --git a/pandas/core/register_accessors.py b/pandas/core/register_accessors.py new file mode 100644 index 0000000000000..f867e6588d99e --- /dev/null +++ b/pandas/core/register_accessors.py @@ -0,0 +1,102 @@ +from pandas.api.types import is_categorical_dtype +from pandas.core import strings +from pandas.core.accessor import (PandasDelegate, register_index_accessor, + register_series_accessor) +from pandas.core.base import NoNewAttributesMixin, PandasObject +from pandas.core.categorical import Categorical +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties + + +@register_series_accessor("cat") +class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): + """ + Accessor object for categorical properties of the Series values. + + Be aware that assigning to `categories` is a inplace operation, while all + methods return new categorical data per default (but can be called with + `inplace=True`). + + Examples + -------- + >>> s.cat.categories + >>> s.cat.categories = list('abc') + >>> s.cat.rename_categories(list('cab')) + >>> s.cat.reorder_categories(list('cab')) + >>> s.cat.add_categories(['d','e']) + >>> s.cat.remove_categories(['d']) + >>> s.cat.remove_unused_categories() + >>> s.cat.set_categories(list('abcde')) + >>> s.cat.as_ordered() + >>> s.cat.as_unordered() + + """ + + def __init__(self, data): + self._validate_dtype(data) + self.categorical = data.values + self.index = data.index + self.name = data.name + self._freeze() + + @staticmethod + def _validate_dtype(data): + if not is_categorical_dtype(data): + msg = "Can only use '.cat' accessor with 'category' dtype" + raise AttributeError(msg) + + def _delegate_property_get(self, name): + return getattr(self.categorical, name) + + def _delegate_property_set(self, name, new_values): + return setattr(self.categorical, name, new_values) + + @property + def codes(self): + from pandas import Series + return Series(self.categorical.codes, index=self.index) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + method = getattr(self.categorical, name) + res = method(*args, **kwargs) + if res is not None: + return Series(res, index=self.index, name=self.name) + + @classmethod + def _make_accessor(cls, data): + cls._validate_dtype(data) + return CategoricalAccessor(data.values, data.index, + getattr(data, 'name', None),) + + +CategoricalAccessor._add_delegate_accessors(delegate=Categorical, + accessors=["categories", + "ordered"], + typ='property') +CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=[ + "rename_categories", "reorder_categories", "add_categories", + "remove_categories", "remove_unused_categories", "set_categories", + "as_ordered", "as_unordered"], typ='method') + + +# --- +# str +# --- + +@register_index_accessor("str") +@register_series_accessor("str") +class StringAccessor(strings.StringMethods): + pass + + +# -- +# dt +# -- + +@register_index_accessor("dt") +@register_series_accessor("dt") +class DatetimeAccessor(CombinedDatetimelikeProperties): + pass + + +__all__ = [] diff --git a/pandas/core/strings.py b/pandas/core/strings.py index fab4e77ce4467..e844867372faa 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1371,12 +1371,44 @@ class StringMethods(NoNewAttributesMixin): """ def __init__(self, data): + self._validate(data) self._is_categorical = is_categorical_dtype(data) self._data = data.cat.categories if self._is_categorical else data # save orig to blow up categoricals to the right type self._orig = data self._freeze() + @staticmethod + def _validate(data): + from pandas.core.index import Index + + if (isinstance(data, ABCSeries) and + not ((is_categorical_dtype(data.dtype) and + is_object_dtype(data.values.categories)) or + (is_object_dtype(data.dtype)))): + # it's neither a string series not a categorical series with + # strings inside the categories. + # this really should exclude all series with any non-string values + # (instead of test for object dtype), but that isn't practical for + # performance reasons until we have a str dtype (GH 9343) + raise AttributeError("Can only use .str accessor with string " + "values, which use np.object_ dtype in " + "pandas") + elif isinstance(data, Index): + # can't use ABCIndex to exclude non-str + + # see scc/inferrence.pyx which can contain string values + allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') + if data.inferred_type not in allowed_types: + message = ("Can only use .str accessor with string values " + "(i.e. inferred_type is 'string', 'unicode' or " + "'mixed')") + raise AttributeError(message) + if data.nlevels > 1: + message = ("Can only use .str accessor with Index, not " + "MultiIndex") + raise AttributeError(message) + def __getitem__(self, key): if isinstance(key, slice): return self.slice(start=key.start, stop=key.stop, step=key.step) @@ -1896,32 +1928,5 @@ def rindex(self, sub, start=0, end=None): @classmethod def _make_accessor(cls, data): - from pandas.core.index import Index - - if (isinstance(data, ABCSeries) and - not ((is_categorical_dtype(data.dtype) and - is_object_dtype(data.values.categories)) or - (is_object_dtype(data.dtype)))): - # it's neither a string series not a categorical series with - # strings inside the categories. - # this really should exclude all series with any non-string values - # (instead of test for object dtype), but that isn't practical for - # performance reasons until we have a str dtype (GH 9343) - raise AttributeError("Can only use .str accessor with string " - "values, which use np.object_ dtype in " - "pandas") - elif isinstance(data, Index): - # can't use ABCIndex to exclude non-str - - # see scc/inferrence.pyx which can contain string values - allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') - if data.inferred_type not in allowed_types: - message = ("Can only use .str accessor with string values " - "(i.e. inferred_type is 'string', 'unicode' or " - "'mixed')") - raise AttributeError(message) - if data.nlevels > 1: - message = ("Can only use .str accessor with Index, not " - "MultiIndex") - raise AttributeError(message) + cls._validate(data) return cls(data) From c1c498c4e9ed97d48ba773906fabcf7c1efba855 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 09:13:32 -0600 Subject: [PATCH 12/30] DOC: Moved to developer --- doc/source/developer.rst | 42 ++++++++++++++++++++++++++++++++++++ doc/source/internals.rst | 46 +++++++++------------------------------- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/doc/source/developer.rst b/doc/source/developer.rst index b8bb2b2fcbe2f..e9203bb6f464a 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -140,3 +140,45 @@ As an example of fully-formed metadata: 'metadata': None} ], 'pandas_version': '0.20.0'} + +.. _register-accessors: + +Registering Custom Accessors +---------------------------- + +Libraries can use the decorators +:func:`pandas.api.extensions.register_dataframe_accessor`, +:func:`pandas.api.extensions.register_series_accessor`, and +:func:`pandas.api.extensions.register_index_accessor`, to add additional "namespaces" to +pandas objects. All of these follow a similar convention: you decorate a class, providing the name of attribute to add. The +class's `__init__` method gets the object being decorated. For example: + +.. ipython:: python + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Now users can access your methods using the `geo` namespace: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This can be a convenient way to extend pandas objects without subclassing them. + diff --git a/doc/source/internals.rst b/doc/source/internals.rst index c1aedc4420acb..48bb727f83efb 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -89,46 +89,20 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but if you compute the levels and labels yourself, please be careful. -.. _register-accessors: +Accessors +--------- -Registering Custom Accessors ----------------------------- +We use accessors (like ``Series.str``, ``CategoricalIndex.cat``, etc) to provide +namespaces for related methods on certain objects. -Libraries can use the decorators -:func:`pandas.api.extensions.register_dataframe_accessor`, -:func:`pandas.api.extensions.register_series_accessor`, and -:func:`pandas.api.extensions.register_index_accessor`, to add additional "namespaces" to -pandas objects. All of these follow a similar convention: you decorate a class, providing the name of attribute to add. The -class's `__init__` method gets the object being decorated. For example: +Accessors are registered using the public accessor registration methods -.. ipython:: python - - @pd.api.extensions.register_dataframe_accessor("geo") - class GeoAccessor(object): - def __init__(self, pandas_obj): - self._obj = pandas_obj +* :func:`pandas.api.extensions.register_dataframe_accessor` +* :func:`pandas.api.extensions.register_series_accessor` +* :func:`pandas.api.extensions.register_index_accessor` - @property - def center(self): - # return the geographic center point of this DataFarme - lon = self._obj.latitude - lat = self._obj.longitude - return (float(lon.mean()), float(lat.mean())) - - def plot(self): - # plot this array's data on a map, e.g., using Cartopy - pass - -Now users can access your methods using the `geo` namespace: - - >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), - ... 'latitude': np.linspace(0, 20)}) - >>> ds.geo.center - (5.0, 10.0) - >>> ds.geo.plot() - # plots data on a map - -This can be a convenient way to extend pandas objects without subclassing them. +Our accessors should inherit from ``NoNewAttributesMixin`` and call +``self._freeze`` at the end of initialization. .. _ref-subclassing-pandas: From ecc1cd73b97a7584ba35c3084a6830a6747f60c5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 09:15:03 -0600 Subject: [PATCH 13/30] REF: Use public registrars for accessors --- pandas/__init__.py | 3 ++ pandas/core/accessor.py | 2 - pandas/core/categorical.py | 12 +++-- pandas/core/indexes/accessors.py | 15 +----- pandas/core/indexes/base.py | 3 -- pandas/core/register_accessors.py | 82 ++----------------------------- pandas/core/series.py | 6 +-- pandas/tests/test_strings.py | 5 +- 8 files changed, 24 insertions(+), 104 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 93c5b6484b840..6d2ce2fea033f 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -46,6 +46,9 @@ from pandas.core.computation.api import * from pandas.core.reshape.api import * +# register accessors +from pandas.core.register_accessors import * + # deprecate tools.plotting, plot_params and scatter_matrix on the top namespace import pandas.tools.plotting plot_params = pandas.plotting._style._Options(deprecated=True) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 3083941ed0a67..6dfe1d4b9a189 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -5,10 +5,8 @@ that can be mixed into or pinned onto other pandas classes. """ -import traceback import warnings -from pandas.compat import PY2 from pandas.errors import AccessorRegistrationWarning diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d47cb0762447b..c0d254199c348 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2146,6 +2146,10 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): methods return new categorical data per default (but can be called with `inplace=True`). + Parameters + ---------- + data : Series or CategoricalIndex + Examples -------- >>> s.cat.categories @@ -2161,10 +2165,10 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): """ - def __init__(self, values, index, name): - self.categorical = values - self.index = index - self.name = name + def __init__(self, data): + self.categorical = data.values + self.index = data.index + self.name = data.name self._freeze() def _delegate_property_get(self, name): diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 116c7eb8c7958..d3d9be8f99c01 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -244,16 +244,5 @@ class PeriodProperties(Properties): typ='method') -class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): - # This class is never instantiated, and exists solely for the benefit of - # the Series.dt class property. For Series objects, .dt will always be one - # of the more specific classes above. - __doc__ = DatetimeProperties.__doc__ - - @classmethod - def _make_accessor(cls, data): - try: - return maybe_to_datetimelike(data) - except Exception: - raise AttributeError("Can only use .dt accessor with " - "datetimelike values") +def CombinedDatetimelikeProperties(data): + return maybe_to_datetimelike(data) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 52c4a1ad9865a..9a49c817418d7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -173,9 +173,6 @@ class Index(IndexOpsMixin, PandasObject): _accessors = frozenset(['str']) - # String Methods - str = accessor.AccessorProperty(strings.StringMethods) - def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/register_accessors.py b/pandas/core/register_accessors.py index f867e6588d99e..1405e801fa7fb 100644 --- a/pandas/core/register_accessors.py +++ b/pandas/core/register_accessors.py @@ -1,82 +1,13 @@ -from pandas.api.types import is_categorical_dtype from pandas.core import strings -from pandas.core.accessor import (PandasDelegate, register_index_accessor, +from pandas.core.accessor import (register_index_accessor, register_series_accessor) -from pandas.core.base import NoNewAttributesMixin, PandasObject -from pandas.core.categorical import Categorical +import pandas.core.categorical from pandas.core.indexes.accessors import CombinedDatetimelikeProperties @register_series_accessor("cat") -class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): - """ - Accessor object for categorical properties of the Series values. - - Be aware that assigning to `categories` is a inplace operation, while all - methods return new categorical data per default (but can be called with - `inplace=True`). - - Examples - -------- - >>> s.cat.categories - >>> s.cat.categories = list('abc') - >>> s.cat.rename_categories(list('cab')) - >>> s.cat.reorder_categories(list('cab')) - >>> s.cat.add_categories(['d','e']) - >>> s.cat.remove_categories(['d']) - >>> s.cat.remove_unused_categories() - >>> s.cat.set_categories(list('abcde')) - >>> s.cat.as_ordered() - >>> s.cat.as_unordered() - - """ - - def __init__(self, data): - self._validate_dtype(data) - self.categorical = data.values - self.index = data.index - self.name = data.name - self._freeze() - - @staticmethod - def _validate_dtype(data): - if not is_categorical_dtype(data): - msg = "Can only use '.cat' accessor with 'category' dtype" - raise AttributeError(msg) - - def _delegate_property_get(self, name): - return getattr(self.categorical, name) - - def _delegate_property_set(self, name, new_values): - return setattr(self.categorical, name, new_values) - - @property - def codes(self): - from pandas import Series - return Series(self.categorical.codes, index=self.index) - - def _delegate_method(self, name, *args, **kwargs): - from pandas import Series - method = getattr(self.categorical, name) - res = method(*args, **kwargs) - if res is not None: - return Series(res, index=self.index, name=self.name) - - @classmethod - def _make_accessor(cls, data): - cls._validate_dtype(data) - return CategoricalAccessor(data.values, data.index, - getattr(data, 'name', None),) - - -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, - accessors=["categories", - "ordered"], - typ='property') -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=[ - "rename_categories", "reorder_categories", "add_categories", - "remove_categories", "remove_unused_categories", "set_categories", - "as_ordered", "as_unordered"], typ='method') +class CategoricalAccessor(pandas.core.categorical.CategoricalAccessor): + pass # --- @@ -93,10 +24,7 @@ class StringAccessor(strings.StringMethods): # dt # -- -@register_index_accessor("dt") -@register_series_accessor("dt") -class DatetimeAccessor(CombinedDatetimelikeProperties): - pass +register_series_accessor("dt")(CombinedDatetimelikeProperties) __all__ = [] diff --git a/pandas/core/series.py b/pandas/core/series.py index 5d8092fd30496..0b7a8440593c4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3061,14 +3061,14 @@ def to_period(self, freq=None, copy=True): # ------------------------------------------------------------------------- # Datetimelike delegation methods - dt = accessor.AccessorProperty(CombinedDatetimelikeProperties) + # dt = accessor.AccessorProperty(CombinedDatetimelikeProperties) # ------------------------------------------------------------------------- # Categorical methods - cat = accessor.AccessorProperty(CategoricalAccessor) + # cat = accessor.AccessorProperty(CategoricalAccessor) # String Methods - str = accessor.AccessorProperty(strings.StringMethods) + # str = accessor.AccessorProperty(strings.StringMethods) # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 8aa69bcbfdf7f..e0759eb32e383 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -24,8 +24,9 @@ class TestStringMethods(object): def test_api(self): # GH 6106, GH 9322 - assert Series.str is strings.StringMethods - assert isinstance(Series(['']).str, strings.StringMethods) + from pandas.core.register_accessors import StringAccessor + assert Series.str is StringAccessor + assert isinstance(Series(['']).str, StringAccessor) # GH 9184 invalid = Series([1]) From 2ad2fa0d366f1f14da669d14356b43a961f0149f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 11:46:51 -0600 Subject: [PATCH 14/30] cleanup --- pandas/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d16616801252b..dc3a0cbe7fb81 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -52,8 +52,7 @@ from pandas.core.indexing import check_bool_indexer, maybe_convert_indices from pandas.core import generic, base from pandas.core.internals import SingleBlockManager -from pandas.core.categorical import Categorical, CategoricalAccessor -import pandas.core.strings as strings +from pandas.core.categorical import Categorical from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex From c2a94e81c73d3eaf2c5305576a8f5701e4baaa03 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 12:41:42 -0600 Subject: [PATCH 15/30] Implemented optional caching --- pandas/core/accessor.py | 22 ++++++++++++---------- pandas/core/indexes/accessors.py | 22 +++++++++------------- pandas/core/register_accessors.py | 8 ++++++++ pandas/core/series.py | 11 ----------- 4 files changed, 29 insertions(+), 34 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 6dfe1d4b9a189..c3467c9282d02 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -140,9 +140,10 @@ def f(self, *args, **kwargs): class _CachedAccessor(object): """Custom property-like object (descriptor) for caching accessors.""" - def __init__(self, name, accessor): + def __init__(self, name, accessor, cache=True): self._name = name self._accessor = accessor + self._cache = cache def __get__(self, obj, cls): if obj is None: @@ -153,11 +154,12 @@ def __get__(self, obj, cls): # http://www.pydanny.com/cached-property.html # We need to use object.__setattr__ because we overwrite __setattr__ on # NDFrame - object.__setattr__(obj, self._name, accessor_obj) + if self._cache: + object.__setattr__(obj, self._name, accessor_obj) return accessor_obj -def _register_accessor(name, cls): +def _register_accessor(name, cls, cache=True): def decorator(accessor): if hasattr(cls, name): warnings.warn( @@ -166,12 +168,12 @@ def decorator(accessor): 'name.'.format(accessor, name, cls), AccessorRegistrationWarning, stacklevel=2) - setattr(cls, name, _CachedAccessor(name, accessor)) + setattr(cls, name, _CachedAccessor(name, accessor, cache=cache)) return accessor return decorator -def register_dataframe_accessor(name): +def register_dataframe_accessor(name, cache=True): """Register a custom accessor on pandas.DataFrame objects. Parameters @@ -217,10 +219,10 @@ def plot(self): register_series_accessor """ from pandas import DataFrame - return _register_accessor(name, DataFrame) + return _register_accessor(name, DataFrame, cache=cache) -def register_series_accessor(name): +def register_series_accessor(name, cache=True): """Register a custom accessor on pandas.Series objects. Parameters @@ -235,10 +237,10 @@ def register_series_accessor(name): register_index_accessor """ from pandas import Series - return _register_accessor(name, Series) + return _register_accessor(name, Series, cache=cache) -def register_index_accessor(name): +def register_index_accessor(name, cache=True): """Register a custom accessor on pandas.Index objects. Parameters @@ -253,4 +255,4 @@ def register_index_accessor(name): register_series_accessor """ from pandas import Index - return _register_accessor(name, Index) + return _register_accessor(name, Index, cache=cache) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 116c7eb8c7958..6b59b2ba31d6a 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -244,16 +244,12 @@ class PeriodProperties(Properties): typ='method') -class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): - # This class is never instantiated, and exists solely for the benefit of - # the Series.dt class property. For Series objects, .dt will always be one - # of the more specific classes above. - __doc__ = DatetimeProperties.__doc__ - - @classmethod - def _make_accessor(cls, data): - try: - return maybe_to_datetimelike(data) - except Exception: - raise AttributeError("Can only use .dt accessor with " - "datetimelike values") +def CombinedDatetimelikeProperties(data): + try: + return maybe_to_datetimelike(data) + except Exception: + raise AttributeError("Can only use .dt accessor with " + "datetimelike values") + + +CombinedDatetimelikeProperties.__doc__ = DatetimeProperties.__doc__ diff --git a/pandas/core/register_accessors.py b/pandas/core/register_accessors.py index c19abac929e82..5b1ee2b045161 100644 --- a/pandas/core/register_accessors.py +++ b/pandas/core/register_accessors.py @@ -2,6 +2,7 @@ from pandas.core.accessor import (register_index_accessor, register_series_accessor) import pandas.core.categorical +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties @register_series_accessor("cat") @@ -19,4 +20,11 @@ class StringAccessor(strings.StringMethods): pass +# -- +# dt +# -- + +register_series_accessor("dt", cache=False)(CombinedDatetimelikeProperties) + + __all__ = [] diff --git a/pandas/core/series.py b/pandas/core/series.py index dc3a0cbe7fb81..3c1efe76b4e0b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3058,17 +3058,6 @@ def to_period(self, freq=None, copy=True): return self._constructor(new_values, index=new_index).__finalize__(self) - # ------------------------------------------------------------------------- - # Datetimelike delegation methods - dt = accessor.AccessorProperty(CombinedDatetimelikeProperties) - - # ------------------------------------------------------------------------- - # Categorical methods - # cat = accessor.AccessorProperty(CategoricalAccessor) - - # String Methods - # str = accessor.AccessorProperty(strings.StringMethods) - # ---------------------------------------------------------------------- # Add plotting methods to Series plot = accessor.AccessorProperty(gfx.SeriesPlotMethods, From d910a0fcf46025e635ca7571ba5b7c0919208765 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 12:49:28 -0600 Subject: [PATCH 16/30] Document cache --- pandas/core/accessor.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index c3467c9282d02..2a237dfaad550 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -138,8 +138,23 @@ def f(self, *args, **kwargs): # https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py -class _CachedAccessor(object): - """Custom property-like object (descriptor) for caching accessors.""" +class _CachableAccessor(object): + """Custom property-like object (descriptor) for caching accessors. + + Parameters + ---------- + name : str + The namespace this will be accessed under, e.g. ``df.foo`` + accessor : cls + The class with the extension methods. The class' __init__ method + should expect one of a ``Series``, ``DataFrame`` or ``Index`` as + the single argument ``data`` + cache : bool, default True + Whether to cache the accessor on an instance, such that ``df.foo`` + will be the same object every time. Set this to ``False`` if the + object you are extending is mutable (``Series``, ``DataFrame``) and + if your accessor caches anything based on its ``data`` argument. + """ def __init__(self, name, accessor, cache=True): self._name = name self._accessor = accessor @@ -168,7 +183,7 @@ def decorator(accessor): 'name.'.format(accessor, name, cls), AccessorRegistrationWarning, stacklevel=2) - setattr(cls, name, _CachedAccessor(name, accessor, cache=cache)) + setattr(cls, name, _CachableAccessor(name, accessor, cache=cache)) return accessor return decorator @@ -181,6 +196,11 @@ def register_dataframe_accessor(name, cache=True): name : str Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. + cache : bool, default True + Whether to cache the accessor such that ``df.`` is always the + same object for a given ``DataFrame``. Set this to ``False`` if the + object you are extending is mutable (``Series``, ``DataFrame``) and + if your accessor caches anything based on its ``data`` argument. Examples -------- From 8bcd412e74bf1da1a41e91d9a9444e9396714c6c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 13:28:05 -0600 Subject: [PATCH 17/30] Tests passing --- pandas/core/base.py | 12 ++++++++---- pandas/core/categorical.py | 7 +++++++ pandas/tests/series/test_api.py | 3 ++- pandas/tests/test_register_accessor.py | 16 ++++++++++++++++ 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index e90794c6c2e1a..4b3e74eae36b8 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -145,10 +145,14 @@ def _freeze(self): # prevent adding any attribute via s.xxx.new_attribute = ... def __setattr__(self, key, value): # _cache is used by a decorator - # dict lookup instead of getattr as getattr is false for getter - # which error - if getattr(self, "__frozen", False) and not \ - (key in type(self).__dict__ or key == "_cache"): + # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) + # because + # 1.) getattr is false for attributes that raise errors + # 2.) cls.__dict__ doesn't traverse into base classes + if (getattr(self, "__frozen", False) and not + (key == "_cache" or + key in type(self).__dict__ or + getattr(self, key, None) is not None)): raise AttributeError("You cannot add any new attribute '{key}'". format(key=key)) object.__setattr__(self, key, value) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index c0d254199c348..6ec3345571fdd 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2166,11 +2166,18 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): """ def __init__(self, data): + self._validate(data) self.categorical = data.values self.index = data.index self.name = data.name self._freeze() + @staticmethod + def _validate(data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a " + "'category' dtype") + def _delegate_property_get(self, name): return getattr(self.categorical, name) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 8ae7feab451f9..29bed3ecdd79a 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -511,7 +511,8 @@ def test_cat_accessor(self): def test_cat_accessor_api(self): # GH 9322 - from pandas.core.categorical import CategoricalAccessor + from pandas.core.register_accessors import CategoricalAccessor + assert Series.cat is CategoricalAccessor s = Series(list('aabbcde')).astype('category') assert isinstance(s.cat, CategoricalAccessor) diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index 11da629a2b010..bae36d90f1e8d 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -86,3 +86,19 @@ def __init__(self, data): with tm.assert_raises_regex(AttributeError, "whoops"): pd.Series([]).bad + + +@pytest.mark.parametrize('obj, registrar', [ + (pd.Series, pd.api.extensions.register_series_accessor), + (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), + (pd.Index, pd.api.extensions.register_index_accessor) +]) +@pytest.mark.parametrize("cache", [True, False]) +def test_cache_works(obj, registrar, cache): + with ensure_removed(obj, "mine"): + registrar("mine", cache=cache)(MyAccessor) + + ser = obj([]) + a = ser.mine + b = ser.mine + assert (a is b) == cache From ded3513e500ad35f37ee3cd4406069e2b941f124 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 13:50:07 -0600 Subject: [PATCH 18/30] Use for plot --- doc/source/developer.rst | 1 - pandas/core/accessor.py | 44 ++++++++++--------------------- pandas/core/categorical.py | 8 ------ pandas/core/frame.py | 3 --- pandas/core/indexes/base.py | 1 - pandas/core/register_accessors.py | 11 +++++++- pandas/core/series.py | 4 --- 7 files changed, 24 insertions(+), 48 deletions(-) diff --git a/doc/source/developer.rst b/doc/source/developer.rst index e9203bb6f464a..b9bf32ca99599 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -181,4 +181,3 @@ Now users can access your methods using the `geo` namespace: # plots data on a map This can be a convenient way to extend pandas objects without subclassing them. - diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 2a237dfaad550..0e2da1064c4c1 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -39,39 +39,9 @@ def __dir__(self): return sorted(rv) -class AccessorProperty(object): - """Descriptor for implementing accessor properties like Series.str - """ - - def __init__(self, accessor_cls, construct_accessor=None): - self.accessor_cls = accessor_cls - self.construct_accessor = (construct_accessor or - accessor_cls._make_accessor) - self.__doc__ = accessor_cls.__doc__ - - def __get__(self, instance, owner=None): - if instance is None: - # this ensures that Series.str. is well defined - return self.accessor_cls - return self.construct_accessor(instance) - - def __set__(self, instance, value): - raise AttributeError("can't set attribute") - - def __delete__(self, instance): - raise AttributeError("can't delete attribute") - - class PandasDelegate(object): """ an abstract base class for delegating methods/properties """ - @classmethod - def _make_accessor(cls, data): - from pandas.core.common import AbstractMethodError - raise AbstractMethodError("_make_accessor should be implemented" - "by subclass and return an instance" - "of `cls`.") - def _delegate_property_get(self, name, *args, **kwargs): raise TypeError("You cannot access the " "property {name}".format(name=name)) @@ -136,6 +106,8 @@ def f(self, *args, **kwargs): # Ported with modifications from xarray # https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py +# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors +# 2. We made caching configurable class _CachableAccessor(object): @@ -250,6 +222,12 @@ def register_series_accessor(name, cache=True): name : str Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. + cache : bool, default True + Whether to cache the accessor such that ``series.`` is always the + same object for a given ``Series``. Set this to ``False`` if the + object you are extending is mutable (``Series``, ``DataFrame``) and + if your accessor caches anything based on its ``data`` argument. + See Also -------- @@ -268,6 +246,12 @@ def register_index_accessor(name, cache=True): name : str Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. + cache : bool, default True + Whether to cache the accessor such that ``df.`` is always the + same object for a given ``DataFrame``. Set this to ``False`` if the + object you are extending is mutable (``Series``, ``DataFrame``) and + if your accessor caches anything based on its ``data`` argument. + See Also -------- diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 6ec3345571fdd..c83662af50eec 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2196,14 +2196,6 @@ def _delegate_method(self, name, *args, **kwargs): if res is not None: return Series(res, index=self.index, name=self.name) - @classmethod - def _make_accessor(cls, data): - if not is_categorical_dtype(data.dtype): - raise AttributeError("Can only use .cat accessor with a " - "'category' dtype") - return CategoricalAccessor(data.values, data.index, - getattr(data, 'name', None),) - CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=["categories", diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9acc82b50aabf..49a01be5d25f7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -92,7 +92,6 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.core import accessor import pandas.core.common as com import pandas.core.nanops as nanops import pandas.core.ops as ops @@ -6006,8 +6005,6 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = accessor.AccessorProperty(gfx.FramePlotMethods, - gfx.FramePlotMethods) hist = gfx.hist_frame boxplot = gfx.boxplot_frame diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c307b5c38d065..b5ad24427621a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -55,7 +55,6 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core import strings, accessor from pandas.core.config import get_option diff --git a/pandas/core/register_accessors.py b/pandas/core/register_accessors.py index 5b1ee2b045161..498073db87fd9 100644 --- a/pandas/core/register_accessors.py +++ b/pandas/core/register_accessors.py @@ -1,8 +1,10 @@ from pandas.core import strings -from pandas.core.accessor import (register_index_accessor, +from pandas.core.accessor import (register_dataframe_accessor, + register_index_accessor, register_series_accessor) import pandas.core.categorical from pandas.core.indexes.accessors import CombinedDatetimelikeProperties +from pandas.plotting._core import SeriesPlotMethods, FramePlotMethods @register_series_accessor("cat") @@ -26,5 +28,12 @@ class StringAccessor(strings.StringMethods): register_series_accessor("dt", cache=False)(CombinedDatetimelikeProperties) +# ---- +# plot +# ---- + +# TODO: see if this triggers the actual mpl import... +register_series_accessor("plot")(SeriesPlotMethods) +register_dataframe_accessor("plot")(FramePlotMethods) __all__ = [] diff --git a/pandas/core/series.py b/pandas/core/series.py index 3c1efe76b4e0b..13d7323cd739b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -53,7 +53,6 @@ from pandas.core import generic, base from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical -from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex @@ -63,7 +62,6 @@ zip, u, OrderedDict, StringIO, range, get_range_parameters) from pandas.compat.numpy import function as nv -from pandas.core import accessor import pandas.core.ops as ops import pandas.core.algorithms as algorithms @@ -3060,8 +3058,6 @@ def to_period(self, freq=None, copy=True): # ---------------------------------------------------------------------- # Add plotting methods to Series - plot = accessor.AccessorProperty(gfx.SeriesPlotMethods, - gfx.SeriesPlotMethods) hist = gfx.hist_series From 632f0972bd872cb613c29e312490046087425ed4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 14:04:37 -0600 Subject: [PATCH 19/30] Fix autodoc --- doc/source/internals.rst | 10 ++++++++-- pandas/core/indexes/accessors.py | 16 ++++++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 48bb727f83efb..62058941fc725 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -95,14 +95,20 @@ Accessors We use accessors (like ``Series.str``, ``CategoricalIndex.cat``, etc) to provide namespaces for related methods on certain objects. +Our accessors should inherit from ``pands.core.base.NoNewAttributesMixin`` and +call ``self._freeze`` at the end of initialization. Additionally, we can use +``pandas.core.accessor.PandasDelegate`` to implement many attributes on the +accessor that simply pass the call through to another object, (e.g. +``series.cat.categories`` delegates to ``Categorical.categories``). + Accessors are registered using the public accessor registration methods * :func:`pandas.api.extensions.register_dataframe_accessor` * :func:`pandas.api.extensions.register_series_accessor` * :func:`pandas.api.extensions.register_index_accessor` -Our accessors should inherit from ``NoNewAttributesMixin`` and call -``self._freeze`` at the end of initialization. +These methods import ``Series``, ``DataFrame``, etc., so the registration +has to happen towards the end of ``pandas.__init__``. .. _ref-subclassing-pandas: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 6b59b2ba31d6a..1a286aadf8df6 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -244,12 +244,16 @@ class PeriodProperties(Properties): typ='method') -def CombinedDatetimelikeProperties(data): - try: - return maybe_to_datetimelike(data) - except Exception: - raise AttributeError("Can only use .dt accessor with " - "datetimelike values") +class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): + # use a class instead of a function so that the methods are inherited, + # making this easier to autodoc + + def __call__(self, data): + try: + return maybe_to_datetimelike(data) + except Exception: + raise AttributeError("Can only use .dt accessor with " + "datetimelike values") CombinedDatetimelikeProperties.__doc__ = DatetimeProperties.__doc__ From 5dc4d0599dda43498cd816daed2a3918899dfad0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 14:24:28 -0600 Subject: [PATCH 20/30] Fix the class instantiation --- pandas/core/indexes/accessors.py | 105 ++++++++++--------------------- 1 file changed, 34 insertions(+), 71 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 1a286aadf8df6..8bc6595b3489d 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -20,68 +20,6 @@ from pandas.core.algorithms import take_1d -def is_datetimelike(data): - """ - return a boolean if we can be successfully converted to a datetimelike - """ - try: - maybe_to_datetimelike(data) - return True - except (Exception): - pass - return False - - -def maybe_to_datetimelike(data, copy=False): - """ - return a DelegatedClass of a Series that is datetimelike - (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) - raise TypeError if this is not possible. - - Parameters - ---------- - data : Series - copy : boolean, default False - copy the input data - - Returns - ------- - DelegatedClass - - """ - from pandas import Series - - if not isinstance(data, Series): - raise TypeError("cannot convert an object of type {0} to a " - "datetimelike index".format(type(data))) - - index = data.index - name = data.name - orig = data if is_categorical_dtype(data) else None - if orig is not None: - data = orig.values.categories - - if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy), - index, name=name, orig=orig) - elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy), - index, data.name, orig=orig) - elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=copy), index, - name=name, orig=orig) - else: - if is_period_arraylike(data): - return PeriodProperties(PeriodIndex(data, copy=copy), index, - name=name, orig=orig) - if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=copy), index, - name=name, orig=orig) - - raise TypeError("cannot convert an object of type {0} to a " - "datetimelike index".format(type(data))) - - class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): def __init__(self, values, index, name, orig=None): @@ -245,15 +183,40 @@ class PeriodProperties(Properties): class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): - # use a class instead of a function so that the methods are inherited, - # making this easier to autodoc - - def __call__(self, data): - try: - return maybe_to_datetimelike(data) - except Exception: - raise AttributeError("Can only use .dt accessor with " - "datetimelike values") + + def __new__(cls, data): + from pandas import Series + + if not isinstance(data, Series): + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) + + index = data.index + name = data.name + orig = data if is_categorical_dtype(data) else None + if orig is not None: + data = orig.values.categories + + if is_datetime64_dtype(data.dtype): + return DatetimeProperties(DatetimeIndex(data, copy=False), + index, name=name, orig=orig) + elif is_datetime64tz_dtype(data.dtype): + return DatetimeProperties(DatetimeIndex(data, copy=False), + index, data.name, orig=orig) + elif is_timedelta64_dtype(data.dtype): + return TimedeltaProperties(TimedeltaIndex(data, copy=False), index, + name=name, orig=orig) + else: + if is_period_arraylike(data): + return PeriodProperties(PeriodIndex(data, copy=False), index, + name=name, orig=orig) + if is_datetime_arraylike(data): + return DatetimeProperties(DatetimeIndex(data, copy=False), + index, + name=name, orig=orig) + + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) CombinedDatetimelikeProperties.__doc__ = DatetimeProperties.__doc__ From 28865d745f2a17035cbe495a590affa6e5e29d7d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 15:33:07 -0600 Subject: [PATCH 21/30] Refactor again. 1. Removed optional caching 2. Refactored `Properties` to create the indexes it uses on demand 3. Moved accessor definitions to classes for clarity --- doc/source/internals.rst | 21 -------- pandas/__init__.py | 3 -- pandas/core/accessor.py | 46 ++++------------ pandas/core/frame.py | 2 + pandas/core/indexes/accessors.py | 75 ++++++++++++++++---------- pandas/core/indexes/base.py | 3 ++ pandas/core/register_accessors.py | 39 -------------- pandas/core/series.py | 13 ++++- pandas/tests/series/test_api.py | 2 +- pandas/tests/test_register_accessor.py | 16 ------ pandas/tests/test_strings.py | 5 +- 11 files changed, 78 insertions(+), 147 deletions(-) delete mode 100644 pandas/core/register_accessors.py diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 62058941fc725..ee4df879d9478 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -89,27 +89,6 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but if you compute the levels and labels yourself, please be careful. -Accessors ---------- - -We use accessors (like ``Series.str``, ``CategoricalIndex.cat``, etc) to provide -namespaces for related methods on certain objects. - -Our accessors should inherit from ``pands.core.base.NoNewAttributesMixin`` and -call ``self._freeze`` at the end of initialization. Additionally, we can use -``pandas.core.accessor.PandasDelegate`` to implement many attributes on the -accessor that simply pass the call through to another object, (e.g. -``series.cat.categories`` delegates to ``Categorical.categories``). - -Accessors are registered using the public accessor registration methods - -* :func:`pandas.api.extensions.register_dataframe_accessor` -* :func:`pandas.api.extensions.register_series_accessor` -* :func:`pandas.api.extensions.register_index_accessor` - -These methods import ``Series``, ``DataFrame``, etc., so the registration -has to happen towards the end of ``pandas.__init__``. - .. _ref-subclassing-pandas: Subclassing pandas Data Structures diff --git a/pandas/__init__.py b/pandas/__init__.py index 6d2ce2fea033f..93c5b6484b840 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -46,9 +46,6 @@ from pandas.core.computation.api import * from pandas.core.reshape.api import * -# register accessors -from pandas.core.register_accessors import * - # deprecate tools.plotting, plot_params and scatter_matrix on the top namespace import pandas.tools.plotting plot_params = pandas.plotting._style._Options(deprecated=True) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 0e2da1064c4c1..6500ac405def0 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -110,7 +110,7 @@ def f(self, *args, **kwargs): # 2. We made caching configurable -class _CachableAccessor(object): +class _CachedAccssor(object): """Custom property-like object (descriptor) for caching accessors. Parameters @@ -121,16 +121,10 @@ class _CachableAccessor(object): The class with the extension methods. The class' __init__ method should expect one of a ``Series``, ``DataFrame`` or ``Index`` as the single argument ``data`` - cache : bool, default True - Whether to cache the accessor on an instance, such that ``df.foo`` - will be the same object every time. Set this to ``False`` if the - object you are extending is mutable (``Series``, ``DataFrame``) and - if your accessor caches anything based on its ``data`` argument. """ - def __init__(self, name, accessor, cache=True): + def __init__(self, name, accessor): self._name = name self._accessor = accessor - self._cache = cache def __get__(self, obj, cls): if obj is None: @@ -141,12 +135,11 @@ def __get__(self, obj, cls): # http://www.pydanny.com/cached-property.html # We need to use object.__setattr__ because we overwrite __setattr__ on # NDFrame - if self._cache: - object.__setattr__(obj, self._name, accessor_obj) + object.__setattr__(obj, self._name, accessor_obj) return accessor_obj -def _register_accessor(name, cls, cache=True): +def _register_accessor(name, cls): def decorator(accessor): if hasattr(cls, name): warnings.warn( @@ -155,12 +148,12 @@ def decorator(accessor): 'name.'.format(accessor, name, cls), AccessorRegistrationWarning, stacklevel=2) - setattr(cls, name, _CachableAccessor(name, accessor, cache=cache)) + setattr(cls, name, _CachedAccssor(name, accessor)) return accessor return decorator -def register_dataframe_accessor(name, cache=True): +def register_dataframe_accessor(name): """Register a custom accessor on pandas.DataFrame objects. Parameters @@ -168,11 +161,6 @@ def register_dataframe_accessor(name, cache=True): name : str Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. - cache : bool, default True - Whether to cache the accessor such that ``df.`` is always the - same object for a given ``DataFrame``. Set this to ``False`` if the - object you are extending is mutable (``Series``, ``DataFrame``) and - if your accessor caches anything based on its ``data`` argument. Examples -------- @@ -211,10 +199,10 @@ def plot(self): register_series_accessor """ from pandas import DataFrame - return _register_accessor(name, DataFrame, cache=cache) + return _register_accessor(name, DataFrame) -def register_series_accessor(name, cache=True): +def register_series_accessor(name): """Register a custom accessor on pandas.Series objects. Parameters @@ -222,12 +210,6 @@ def register_series_accessor(name, cache=True): name : str Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. - cache : bool, default True - Whether to cache the accessor such that ``series.`` is always the - same object for a given ``Series``. Set this to ``False`` if the - object you are extending is mutable (``Series``, ``DataFrame``) and - if your accessor caches anything based on its ``data`` argument. - See Also -------- @@ -235,10 +217,10 @@ def register_series_accessor(name, cache=True): register_index_accessor """ from pandas import Series - return _register_accessor(name, Series, cache=cache) + return _register_accessor(name, Series) -def register_index_accessor(name, cache=True): +def register_index_accessor(name): """Register a custom accessor on pandas.Index objects. Parameters @@ -246,12 +228,6 @@ def register_index_accessor(name, cache=True): name : str Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. - cache : bool, default True - Whether to cache the accessor such that ``df.`` is always the - same object for a given ``DataFrame``. Set this to ``False`` if the - object you are extending is mutable (``Series``, ``DataFrame``) and - if your accessor caches anything based on its ``data`` argument. - See Also -------- @@ -259,4 +235,4 @@ def register_index_accessor(name, cache=True): register_series_accessor """ from pandas import Index - return _register_accessor(name, Index, cache=cache) + return _register_accessor(name, Index) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49a01be5d25f7..ff861c71266e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,6 +23,7 @@ import numpy as np import numpy.ma as ma +from pandas.core.accessor import _CachedAccssor from pandas.core.dtypes.cast import ( maybe_upcast, cast_scalar_to_array, @@ -6005,6 +6006,7 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame + plot = _CachedAccssor("plot", gfx.FramePlotMethods) hist = gfx.hist_frame boxplot = gfx.boxplot_frame diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 8bc6595b3489d..2067c89b0c7e0 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -4,6 +4,7 @@ import numpy as np +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( is_period_arraylike, is_datetime_arraylike, is_integer_dtype, @@ -22,17 +23,46 @@ class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): - def __init__(self, values, index, name, orig=None): - self.values = values - self.index = index - self.name = name - self.orig = orig + def __init__(self, data): + if not isinstance(data, ABCSeries): + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) + + orig = data if is_categorical_dtype(data) else None + if orig is not None: + data = orig.values.categories + + self._values = data + self._orig = orig + self._name = getattr(data, 'name', None) + self._index = getattr(data, 'index', None) self._freeze() + def _get_values(self): + data = self._values + if is_datetime64_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self._name) + + elif is_datetime64tz_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self._name) + + elif is_timedelta64_dtype(data.dtype): + return TimedeltaIndex(data, copy=False, name=self._name) + + else: + if is_period_arraylike(data): + return PeriodIndex(data, copy=False, name=self._name) + if is_datetime_arraylike(data): + return DatetimeIndex(data, copy=False, name=self._name) + + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) + def _delegate_property_get(self, name): from pandas import Series + values = self._get_values() - result = getattr(self.values, name) + result = getattr(values, name) # maybe need to upcast (ints) if isinstance(result, np.ndarray): @@ -44,11 +74,11 @@ def _delegate_property_get(self, name): result = np.asarray(result) # blow up if we operate on categories - if self.orig is not None: - result = take_1d(result, self.orig.cat.codes) + if self._orig is not None: + result = take_1d(result, self._orig.cat.codes) # return the result as a Series, which is by definition a copy - result = Series(result, index=self.index, name=self.name) + result = Series(result, index=self._index, name=self._name) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ("modifications to a property of a datetimelike " @@ -64,14 +94,15 @@ def _delegate_property_set(self, name, value, *args, **kwargs): def _delegate_method(self, name, *args, **kwargs): from pandas import Series + values = self._get_values() - method = getattr(self.values, name) + method = getattr(values, name) result = method(*args, **kwargs) if not is_list_like(result): return result - result = Series(result, index=self.index, name=self.name) + result = Series(result, index=self._index, name=self._name) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ("modifications to a method of a datetimelike " @@ -191,29 +222,17 @@ def __new__(cls, data): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) - index = data.index - name = data.name - orig = data if is_categorical_dtype(data) else None - if orig is not None: - data = orig.values.categories - if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=False), - index, name=name, orig=orig) + return DatetimeProperties(data) elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=False), - index, data.name, orig=orig) + return DatetimeProperties(data) elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=False), index, - name=name, orig=orig) + return TimedeltaProperties(data) else: if is_period_arraylike(data): - return PeriodProperties(PeriodIndex(data, copy=False), index, - name=name, orig=orig) + return PeriodProperties(data) if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=False), - index, - name=name, orig=orig) + return DatetimeProperties(data) raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b5ad24427621a..d45403e4b96de 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -12,6 +12,7 @@ from pandas.compat.numpy import function as nv from pandas import compat +from pandas.core.accessor import _CachedAccssor from pandas.core.dtypes.generic import ( ABCSeries, ABCMultiIndex, @@ -56,6 +57,7 @@ from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.config import get_option +from pandas.core.strings import StringMethods # simplify @@ -171,6 +173,7 @@ class Index(IndexOpsMixin, PandasObject): _engine_type = libindex.ObjectEngine _accessors = frozenset(['str']) + str = _CachedAccssor("str", StringMethods) def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/register_accessors.py b/pandas/core/register_accessors.py deleted file mode 100644 index 498073db87fd9..0000000000000 --- a/pandas/core/register_accessors.py +++ /dev/null @@ -1,39 +0,0 @@ -from pandas.core import strings -from pandas.core.accessor import (register_dataframe_accessor, - register_index_accessor, - register_series_accessor) -import pandas.core.categorical -from pandas.core.indexes.accessors import CombinedDatetimelikeProperties -from pandas.plotting._core import SeriesPlotMethods, FramePlotMethods - - -@register_series_accessor("cat") -class CategoricalAccessor(pandas.core.categorical.CategoricalAccessor): - pass - - -# --- -# str -# --- - -@register_index_accessor("str") -@register_series_accessor("str") -class StringAccessor(strings.StringMethods): - pass - - -# -- -# dt -# -- - -register_series_accessor("dt", cache=False)(CombinedDatetimelikeProperties) - -# ---- -# plot -# ---- - -# TODO: see if this triggers the actual mpl import... -register_series_accessor("plot")(SeriesPlotMethods) -register_dataframe_accessor("plot")(FramePlotMethods) - -__all__ = [] diff --git a/pandas/core/series.py b/pandas/core/series.py index 13d7323cd739b..b6ec66c606c05 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,6 +13,7 @@ import numpy as np import numpy.ma as ma +from pandas.core.accessor import _CachedAccssor from pandas.core.dtypes.common import ( is_categorical_dtype, is_bool, @@ -52,7 +53,8 @@ from pandas.core.indexing import check_bool_indexer, maybe_convert_indices from pandas.core import generic, base from pandas.core.internals import SingleBlockManager -from pandas.core.categorical import Categorical +from pandas.core.categorical import Categorical, CategoricalAccessor +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex @@ -74,6 +76,7 @@ from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option +from pandas.core.strings import StringMethods import pandas.plotting._core as gfx @@ -3056,6 +3059,14 @@ def to_period(self, freq=None, copy=True): return self._constructor(new_values, index=new_index).__finalize__(self) + # ---------------------------------------------------------------------- + # Accessor Methods + # ---------------------------------------------------------------------- + str = _CachedAccssor("str", StringMethods) + dt = _CachedAccssor("dt", CombinedDatetimelikeProperties) + cat = _CachedAccssor("cat", CategoricalAccessor) + plot = _CachedAccssor("plot", gfx.SeriesPlotMethods) + # ---------------------------------------------------------------------- # Add plotting methods to Series hist = gfx.hist_series diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 29bed3ecdd79a..73cc87855acbd 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -511,7 +511,7 @@ def test_cat_accessor(self): def test_cat_accessor_api(self): # GH 9322 - from pandas.core.register_accessors import CategoricalAccessor + from pandas.core.categorical import CategoricalAccessor assert Series.cat is CategoricalAccessor s = Series(list('aabbcde')).astype('category') diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index bae36d90f1e8d..11da629a2b010 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -86,19 +86,3 @@ def __init__(self, data): with tm.assert_raises_regex(AttributeError, "whoops"): pd.Series([]).bad - - -@pytest.mark.parametrize('obj, registrar', [ - (pd.Series, pd.api.extensions.register_series_accessor), - (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), - (pd.Index, pd.api.extensions.register_index_accessor) -]) -@pytest.mark.parametrize("cache", [True, False]) -def test_cache_works(obj, registrar, cache): - with ensure_removed(obj, "mine"): - registrar("mine", cache=cache)(MyAccessor) - - ser = obj([]) - a = ser.mine - b = ser.mine - assert (a is b) == cache diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index e0759eb32e383..8aa69bcbfdf7f 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -24,9 +24,8 @@ class TestStringMethods(object): def test_api(self): # GH 6106, GH 9322 - from pandas.core.register_accessors import StringAccessor - assert Series.str is StringAccessor - assert isinstance(Series(['']).str, StringAccessor) + assert Series.str is strings.StringMethods + assert isinstance(Series(['']).str, strings.StringMethods) # GH 9184 invalid = Series([1]) From 3bf4889b71df3e429ad88d7644021c5174cb7587 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 15:41:25 -0600 Subject: [PATCH 22/30] Fix API files --- pandas/api/extensions.py | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 pandas/api/extensions.py diff --git a/pandas/api/extensions.py b/pandas/api/extensions.py deleted file mode 100644 index 64f5e8fb939a4..0000000000000 --- a/pandas/api/extensions.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Public API for extending panadas objects.""" -from pandas.core.accessor import (register_dataframe_accessor, # noqa - register_index_accessor, - register_series_accessor) From dea5d173a3406f09edfa1ebd9905f16d126bd0ac Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Jan 2018 15:41:50 -0600 Subject: [PATCH 23/30] Remove stale comment --- pandas/core/accessor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 6500ac405def0..ab00506ae97c6 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -107,7 +107,6 @@ def f(self, *args, **kwargs): # Ported with modifications from xarray # https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py # 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors -# 2. We made caching configurable class _CachedAccssor(object): From 9559f1237727f39d65ec0fc476d5d928abfa391f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 Jan 2018 06:03:14 -0600 Subject: [PATCH 24/30] Tests pass --- pandas/core/indexes/accessors.py | 81 +++++++++++++++++--------------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 2067c89b0c7e0..6abef1f2470a4 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -23,37 +23,33 @@ class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): - def __init__(self, data): + def __init__(self, data, orig): if not isinstance(data, ABCSeries): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) - orig = data if is_categorical_dtype(data) else None - if orig is not None: - data = orig.values.categories - - self._values = data - self._orig = orig - self._name = getattr(data, 'name', None) - self._index = getattr(data, 'index', None) + self.values = data + self.orig = orig + self.name = getattr(data, 'name', None) + self.index = getattr(data, 'index', None) self._freeze() def _get_values(self): - data = self._values + data = self.values if is_datetime64_dtype(data.dtype): - return DatetimeIndex(data, copy=False, name=self._name) + return DatetimeIndex(data, copy=False, name=self.name) elif is_datetime64tz_dtype(data.dtype): - return DatetimeIndex(data, copy=False, name=self._name) + return DatetimeIndex(data, copy=False, name=self.name) elif is_timedelta64_dtype(data.dtype): - return TimedeltaIndex(data, copy=False, name=self._name) + return TimedeltaIndex(data, copy=False, name=self.name) else: if is_period_arraylike(data): - return PeriodIndex(data, copy=False, name=self._name) + return PeriodIndex(data, copy=False, name=self.name) if is_datetime_arraylike(data): - return DatetimeIndex(data, copy=False, name=self._name) + return DatetimeIndex(data, copy=False, name=self.name) raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) @@ -74,11 +70,11 @@ def _delegate_property_get(self, name): result = np.asarray(result) # blow up if we operate on categories - if self._orig is not None: - result = take_1d(result, self._orig.cat.codes) + if self.orig is not None: + result = take_1d(result, self.orig.cat.codes) # return the result as a Series, which is by definition a copy - result = Series(result, index=self._index, name=self._name) + result = Series(result, index=self.index, name=self.name) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ("modifications to a property of a datetimelike " @@ -102,7 +98,7 @@ def _delegate_method(self, name, *args, **kwargs): if not is_list_like(result): return result - result = Series(result, index=self._index, name=self._name) + result = Series(result, index=self.index, name=self.name) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ("modifications to a method of a datetimelike " @@ -127,11 +123,11 @@ class DatetimeProperties(Properties): """ def to_pydatetime(self): - return self.values.to_pydatetime() + return self._get_values().to_pydatetime() @property def freq(self): - return self.values.inferred_freq + return self._get_values().inferred_freq DatetimeProperties._add_delegate_accessors( @@ -158,7 +154,7 @@ class TimedeltaProperties(Properties): """ def to_pytimedelta(self): - return self.values.to_pytimedelta() + return self._get_values().to_pytimedelta() @property def components(self): @@ -171,11 +167,11 @@ def components(self): a DataFrame """ - return self.values.components.set_index(self.index) + return self._get_values().components.set_index(self.index) @property def freq(self): - return self.values.inferred_freq + return self._get_values().inferred_freq TimedeltaProperties._add_delegate_accessors( @@ -222,20 +218,29 @@ def __new__(cls, data): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) - if is_datetime64_dtype(data.dtype): - return DatetimeProperties(data) - elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(data) - elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(data) - else: - if is_period_arraylike(data): - return PeriodProperties(data) - if is_datetime_arraylike(data): - return DatetimeProperties(data) - - raise TypeError("cannot convert an object of type {0} to a " - "datetimelike index".format(type(data))) + orig = data if is_categorical_dtype(data) else None + if orig is not None: + data = Series(orig.values.categories, + name=orig.name, + copy=False) + + try: + if is_datetime64_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_datetime64tz_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_timedelta64_dtype(data.dtype): + return TimedeltaProperties(data, orig) + else: + if is_period_arraylike(data): + return PeriodProperties(data, orig) + if is_datetime_arraylike(data): + return DatetimeProperties(data, orig) + except Exception: + pass # we raise an attribute error anyway + + raise AttributeError("Can only use .dt accessor with datetimelike " + "values") CombinedDatetimelikeProperties.__doc__ = DatetimeProperties.__doc__ From b00b0f856ee4d18298c82ea940fee37e49bd125c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 Jan 2018 06:09:09 -0600 Subject: [PATCH 25/30] DOC: some cleanup --- doc/source/developer.rst | 4 ++- doc/source/whatsnew/v0.23.0.txt | 57 ++++----------------------------- 2 files changed, 10 insertions(+), 51 deletions(-) diff --git a/doc/source/developer.rst b/doc/source/developer.rst index b9bf32ca99599..5c3b114ce7299 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -141,7 +141,7 @@ As an example of fully-formed metadata: ], 'pandas_version': '0.20.0'} -.. _register-accessors: +.. _developer.register-accessors: Registering Custom Accessors ---------------------------- @@ -181,3 +181,5 @@ Now users can access your methods using the `geo` namespace: # plots data on a map This can be a convenient way to extend pandas objects without subclassing them. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 43cf26eeb2c6f..967ade2312d75 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -119,56 +119,6 @@ Current Behavior s.rank(na_option='top') - -Extending Pandas Objects with New Accessors -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Pandas uses accessors to group together many related methods into a namespace on -``Series`` or ``Index`` objects. For example, ``Series.str`` for string methods, -or ``Series.dt`` for datetime methods. Inspired by xarray, pandas now officially -supports registering custom accessors in library code. - -Once you've implemented an accessor, register it with one or more of the decorators - -* :func:`pandas.api.extensions.register_dataframe_accessor` -* :func:`pandas.api.extensions.register_series_accessor` -* :func:`pandas.api.extensions.register_index_accessor` - -.. code-block:: python - - import pandas as pd - - @pd.extensions.register_dataframe_accessor("geo") - class GeoAccessor(object): - def __init__(self, pandas_obj): - self._obj = pandas_obj - - @property - def center(self): - # return the geographic center point of this DataFarme - lon = self._obj.latitude - lat = self._obj.longitude - return (float(lon.mean()), float(lat.mean())) - - def plot(self): - # plot this array's data on a map, e.g., using Cartopy - pass - -Back in an interactive IPython session: - -.. code-block:: python - - >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), - ... 'latitude': np.linspace(0, 20)}) - >>> ds.geo.center - (5.0, 10.0) - >>> ds.geo.plot() - # plots data on a map - -This provides a convenient alternative to subclassing or composition. -If you write a custom accessor, make a pull request adding it to our -:ref:`ecosystem` page. - .. _whatsnew_0230.enhancements.other: Other Enhancements @@ -195,6 +145,13 @@ Other Enhancements - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- Added :func:`pandas.api.extensions.register_dataframe_accessor`, + :func:`pandas.api.extensions.register_series_accessor`, and + :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas + to register custom accessors like ``.cat`` on pandas objects. See + :ref:`Registering Custom Accessors ` for more (:issue:`14781`). + + .. _whatsnew_0230.api_breaking: From f03777fdeb8bd52ea9504344d088222d20ab73f0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 Jan 2018 07:28:31 -0600 Subject: [PATCH 26/30] No need to assign doc --- pandas/core/indexes/accessors.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 6abef1f2470a4..cb317386264de 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -241,6 +241,3 @@ def __new__(cls, data): raise AttributeError("Can only use .dt accessor with datetimelike " "values") - - -CombinedDatetimelikeProperties.__doc__ = DatetimeProperties.__doc__ From 1bedf9fa92fba2474a824c9e74afe9926b534d4e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 Jan 2018 08:28:43 -0600 Subject: [PATCH 27/30] Rename, shared docs --- pandas/core/accessor.py | 133 ++++++++++++++++++------------------ pandas/core/frame.py | 4 +- pandas/core/indexes/base.py | 4 +- pandas/core/series.py | 10 +-- 4 files changed, 77 insertions(+), 74 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index ab00506ae97c6..e59fb4d759150 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -8,6 +8,7 @@ import warnings from pandas.errors import AccessorRegistrationWarning +from pandas.util._decorators import Appender class DirNamesMixin(object): @@ -109,7 +110,7 @@ def f(self, *args, **kwargs): # 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors -class _CachedAccssor(object): +class CachedAccessor(object): """Custom property-like object (descriptor) for caching accessors. Parameters @@ -147,91 +148,93 @@ def decorator(accessor): 'name.'.format(accessor, name, cls), AccessorRegistrationWarning, stacklevel=2) - setattr(cls, name, _CachedAccssor(name, accessor)) + setattr(cls, name, CachedAccessor(name, accessor)) return accessor return decorator -def register_dataframe_accessor(name): - """Register a custom accessor on pandas.DataFrame objects. +_doc = """Register a custom accessor on %(klass)s objects. - Parameters - ---------- - name : str - Name under which the accessor should be registered. A warning is issued - if this name conflicts with a preexisting attribute. +Parameters +---------- +name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. - Examples - -------- +Notes +----- +When accessed, your accessor will be initialized with the pandas object +the user is interacting with. So the signature must be - In your library code:: +.. code-block:: python - import pandas as pd + def __init__(self, pandas_object): - @pd.extensions.register_dataframe_accessor("geo") - class GeoAccessor(object): - def __init__(self, pandas_obj): - self._obj = pandas_obj +For consistency with pandas methods, you should raise an ``AttributeError`` +if the data passed to your accessor has an incorrect dtype. - @property - def center(self): - # return the geographic center point of this DataFarme - lon = self._obj.latitude - lat = self._obj.longitude - return (float(lon.mean()), float(lat.mean())) +>>> pd.Series(['a', 'b']).dt +Traceback (most recent call last): +... +AttributeError: Can only use .dt accessor with datetimelike values - def plot(self): - # plot this array's data on a map, e.g., using Cartopy - pass +Examples +-------- - Back in an interactive IPython session: - >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), - ... 'latitude': np.linspace(0, 20)}) - >>> ds.geo.center - (5.0, 10.0) - >>> ds.geo.plot() - # plots data on a map - - See also - -------- - register_index_accessor - register_series_accessor - """ +In your library code:: + + import pandas as pd + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Back in an interactive IPython session: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +See also +-------- +%(others)s +""" + + +@Appender(_doc % dict(klass="DataFrame", + others=("register_series_accessor, " + "register_index_accessor"))) +def register_dataframe_accessor(name): from pandas import DataFrame return _register_accessor(name, DataFrame) +@Appender(_doc % dict(klass="Series", + others=("register_dataframe_accessor, " + "register_index_accessor"))) def register_series_accessor(name): - """Register a custom accessor on pandas.Series objects. - - Parameters - ---------- - name : str - Name under which the accessor should be registered. A warning is issued - if this name conflicts with a preexisting attribute. - - See Also - -------- - register_dataframe_accessor - register_index_accessor - """ from pandas import Series return _register_accessor(name, Series) +@Appender(_doc % dict(klass="Index", + others=("register_dataframe_accessor, " + "register_series_accessor"))) def register_index_accessor(name): - """Register a custom accessor on pandas.Index objects. - - Parameters - ---------- - name : str - Name under which the accessor should be registered. A warning is issued - if this name conflicts with a preexisting attribute. - - See Also - -------- - register_dataframe_accessor - register_series_accessor - """ from pandas import Index return _register_accessor(name, Index) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff861c71266e9..a095f8764f3bf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,7 +23,7 @@ import numpy as np import numpy.ma as ma -from pandas.core.accessor import _CachedAccssor +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.cast import ( maybe_upcast, cast_scalar_to_array, @@ -6006,7 +6006,7 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = _CachedAccssor("plot", gfx.FramePlotMethods) + plot = CachedAccessor("plot", gfx.FramePlotMethods) hist = gfx.hist_frame boxplot = gfx.boxplot_frame diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d45403e4b96de..c3561a75ef042 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -12,7 +12,7 @@ from pandas.compat.numpy import function as nv from pandas import compat -from pandas.core.accessor import _CachedAccssor +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.generic import ( ABCSeries, ABCMultiIndex, @@ -173,7 +173,7 @@ class Index(IndexOpsMixin, PandasObject): _engine_type = libindex.ObjectEngine _accessors = frozenset(['str']) - str = _CachedAccssor("str", StringMethods) + str = CachedAccessor("str", StringMethods) def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index b6ec66c606c05..191c59c0d9eae 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,7 +13,7 @@ import numpy as np import numpy.ma as ma -from pandas.core.accessor import _CachedAccssor +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.common import ( is_categorical_dtype, is_bool, @@ -3062,10 +3062,10 @@ def to_period(self, freq=None, copy=True): # ---------------------------------------------------------------------- # Accessor Methods # ---------------------------------------------------------------------- - str = _CachedAccssor("str", StringMethods) - dt = _CachedAccssor("dt", CombinedDatetimelikeProperties) - cat = _CachedAccssor("cat", CategoricalAccessor) - plot = _CachedAccssor("plot", gfx.SeriesPlotMethods) + str = CachedAccessor("str", StringMethods) + dt = CachedAccessor("dt", CombinedDatetimelikeProperties) + cat = CachedAccessor("cat", CategoricalAccessor) + plot = CachedAccessor("plot", gfx.SeriesPlotMethods) # ---------------------------------------------------------------------- # Add plotting methods to Series From 018facd6575746aeae888115efee547b2889e860 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 5 Jan 2018 08:49:46 -0600 Subject: [PATCH 28/30] Doc __new__ --- pandas/core/indexes/accessors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index cb317386264de..d40230386216c 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -212,6 +212,10 @@ class PeriodProperties(Properties): class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): def __new__(cls, data): + # CombinedDatetimelikeProperties isn't really instantiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. from pandas import Series if not isinstance(data, Series): From a308a2e29286c03c71e3566a881629d996b4821c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Jan 2018 07:19:03 -0600 Subject: [PATCH 29/30] Use UserWarning --- pandas/core/accessor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index e59fb4d759150..96bf628c8d7ff 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -7,7 +7,6 @@ """ import warnings -from pandas.errors import AccessorRegistrationWarning from pandas.util._decorators import Appender @@ -108,7 +107,7 @@ def f(self, *args, **kwargs): # Ported with modifications from xarray # https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py # 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors - +# 2. We use a UserWarning instead of a custom Warning class CachedAccessor(object): """Custom property-like object (descriptor) for caching accessors. @@ -146,7 +145,7 @@ def decorator(accessor): 'registration of accessor {!r} under name {!r} for type ' '{!r} is overriding a preexisting attribute with the same ' 'name.'.format(accessor, name, cls), - AccessorRegistrationWarning, + UserWarning, stacklevel=2) setattr(cls, name, CachedAccessor(name, accessor)) return accessor From 66e2207a91ee97aba943bebb6da6021a7f58d93f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Jan 2018 07:20:31 -0600 Subject: [PATCH 30/30] Update test --- pandas/tests/test_register_accessor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index 11da629a2b010..fe0cf4c9b38af 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -4,7 +4,6 @@ import pandas as pd import pandas.util.testing as tm -from pandas.errors import AccessorRegistrationWarning @contextlib.contextmanager @@ -63,7 +62,7 @@ def test_overwrite_warns(): # Need to restore mean mean = pd.Series.mean try: - with tm.assert_produces_warning(AccessorRegistrationWarning) as w: + with tm.assert_produces_warning(UserWarning) as w: pd.api.extensions.register_series_accessor('mean')(MyAccessor) s = pd.Series([1, 2]) assert s.mean.prop == 'item'