diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 608e2c8e72ded..7f0cfbbd2414a 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -717,11 +717,9 @@ and allows efficient indexing and storage of an index with a large number of dup .. ipython:: python - from pandas.api.types import CategoricalDtype - df = pd.DataFrame({'A': np.arange(6), 'B': list('aabbca')}) - df['B'] = df['B'].astype(CategoricalDtype(list('cab'))) + df['B'] = df['B'].astype(pd.CategoricalDtype(list('cab'))) df df.dtypes df.B.cat.categories @@ -747,7 +745,7 @@ The ``CategoricalIndex`` is **preserved** after indexing: df2.loc['a'].index Sorting the index will sort by the order of the categories (recall that we -created the index with ``CategoricalDtype(list('cab'))``, so the sorted +created the index with ``pd.CategoricalDtype(list('cab'))``, so the sorted order is ``cab``). .. ipython:: python diff --git a/doc/source/api.rst b/doc/source/api.rst index 665649aead33c..60f3f078e2e75 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -702,93 +702,6 @@ strings and apply several methods to it. These can be accessed like Series.dt Index.str -.. _api.categorical: - -Categorical -~~~~~~~~~~~ - -Pandas defines a custom data type for representing data that can take only a -limited, fixed set of values. The dtype of a ``Categorical`` can be described by -a :class:`pandas.api.types.CategoricalDtype`. - -.. autosummary:: - :toctree: generated/ - :template: autosummary/class_without_autosummary.rst - - api.types.CategoricalDtype - -.. autosummary:: - :toctree: generated/ - - api.types.CategoricalDtype.categories - api.types.CategoricalDtype.ordered - -Categorical data can be stored in a :class:`pandas.Categorical` - -.. autosummary:: - :toctree: generated/ - :template: autosummary/class_without_autosummary.rst - - Categorical - - -The alternative :meth:`Categorical.from_codes` constructor can be used when you -have the categories and integer codes already: - -.. autosummary:: - :toctree: generated/ - - Categorical.from_codes - -The dtype information is available on the ``Categorical`` - -.. autosummary:: - :toctree: generated/ - - Categorical.dtype - Categorical.categories - Categorical.ordered - Categorical.codes - -``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts -the Categorical back to a NumPy array, so categories and order information is not preserved! - -.. autosummary:: - :toctree: generated/ - - Categorical.__array__ - -A ``Categorical`` can be stored in a ``Series`` or ``DataFrame``. -To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or -``Series(..., dtype=dtype)`` where ``dtype`` is either - -* the string ``'category'`` -* an instance of :class:`~pandas.api.types.CategoricalDtype`. - -If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical -data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the -following usable methods and properties: - -.. autosummary:: - :toctree: generated/ - :template: autosummary/accessor_attribute.rst - - Series.cat.categories - Series.cat.ordered - Series.cat.codes - -.. autosummary:: - :toctree: generated/ - :template: autosummary/accessor_method.rst - - Series.cat.rename_categories - Series.cat.reorder_categories - Series.cat.add_categories - Series.cat.remove_categories - Series.cat.remove_unused_categories - Series.cat.set_categories - Series.cat.as_ordered - Series.cat.as_unordered Plotting ~~~~~~~~ @@ -842,29 +755,6 @@ Serialization / IO / Conversion Series.to_clipboard Series.to_latex -Sparse -~~~~~~ -.. autosummary:: - :toctree: generated/ - - SparseSeries.to_coo - SparseSeries.from_coo - -.. autosummary:: - :toctree: generated/ - :template: autosummary/accessor_attribute.rst - - Series.sparse.npoints - Series.sparse.density - Series.sparse.fill_value - Series.sparse.sp_values - - -.. autosummary:: - :toctree: generated/ - - Series.sparse.from_coo - Series.sparse.to_coo .. _api.dataframe: @@ -1675,7 +1565,7 @@ IntervalIndex Components IntervalIndex.get_indexer IntervalIndex.set_closed IntervalIndex.overlaps - + IntervalIndex.to_tuples .. _api.multiindex: @@ -2567,6 +2457,256 @@ Exceptions and warnings errors.UnsortedIndexError errors.UnsupportedFunctionCall +.. _api.extension: + +Extension Types +--------------- + +Pandas implements several :class:`api.extension.ExtensoinArray` types. +These arrays may be placed in Series or a column of a DataFrame. Some +may also have a specialized index type. + +.. _api.categorical: + +Categorical +~~~~~~~~~~~ + +Pandas defines a custom data type for representing data that can take only a +limited, fixed set of values. The dtype of a ``Categorical`` can be described by +a :class:`CategoricalDtype`. +See :ref:`categorical` for more on working with Categorical data. + +.. autosummary:: + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + CategoricalDtype + +.. autosummary:: + :toctree: generated/ + + CategoricalDtype.categories + CategoricalDtype.ordered + +Categorical data can be stored in a :class:`pandas.Categorical` + +.. autosummary:: + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + Categorical + + +The alternative :meth:`Categorical.from_codes` constructor can be used when you +have the categories and integer codes already: + +.. autosummary:: + :toctree: generated/ + + Categorical.from_codes + +The dtype of a ``Categorical`` is a :class:`CategoricalDtype`. The dtype +stores the actual categories, and whether the categories are ordered. + +.. autosummary: + :toctree: generated/ + + CategoricalDtype + +The dtype information is also available directly on the ``Categorical``. + +.. autosummary:: + :toctree: generated/ + + Categorical.dtype + Categorical.categories + Categorical.ordered + Categorical.codes + +``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts +the Categorical back to a NumPy array, so categories and order information is not preserved! + +.. autosummary:: + :toctree: generated/ + + Categorical.__array__ + +A ``Categorical`` can be stored in a ``Series`` or ``DataFrame``. +To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or +``Series(..., dtype=dtype)`` where ``dtype`` is either + +* the string ``'category'`` +* an instance of :class:`CategoricalDtype`. + +If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical +data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the +following usable methods and properties: + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_attribute.rst + + Series.cat.categories + Series.cat.ordered + Series.cat.codes + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_method.rst + + Series.cat.rename_categories + Series.cat.reorder_categories + Series.cat.add_categories + Series.cat.remove_categories + Series.cat.remove_unused_categories + Series.cat.set_categories + Series.cat.as_ordered + Series.cat.as_unordered + +.. _api.integer: + +Integer-NA +~~~~~~~~~~ + +:class:`api.extensions.IntegerArray` can be used to store integer-value data +that may contain missing values. This works around the consequence of using +:attr:`numpy.nan`, a floating point value, as a missing value sentinel. +See :ref:`integer_na` for more. + +.. autosummary:: + :toctree: generated/ + + integer_array + +.. autosummary:: + :toctree: generated/ + + api.extensions.IntegerArray + +.. _api.period: + +Period +~~~~~~ + +:class:`api.extensions.PeriodArray` is an array for data representing a time span. +The scalar type is a :class:`Period`. These may be stored in a :class:`Series` +or as a :class:`PeriodIndex`. :func:`period_array` should be used to create a +new :class:`api.extensions.PeriodArray`. + +.. autosummary:: + :toctree: generated/ + + period_array + +.. autosummary:: + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + api.extensions.PeriodArray + +.. autosummary:: + :toctree: generated/ + + api.extensions.PeriodArray.day + api.extensions.PeriodArray.dayofweek + api.extensions.PeriodArray.dayofyear + api.extensions.PeriodArray.days_in_month + api.extensions.PeriodArray.daysinmonth + api.extensions.PeriodArray.end_time + api.extensions.PeriodArray.freq + api.extensions.PeriodArray.freqstr + api.extensions.PeriodArray.hour + api.extensions.PeriodArray.is_leap_year + api.extensions.PeriodArray.minute + api.extensions.PeriodArray.month + api.extensions.PeriodArray.quarter + api.extensions.PeriodArray.qyear + api.extensions.PeriodArray.second + api.extensions.PeriodArray.start_time + api.extensions.PeriodArray.week + api.extensions.PeriodArray.weekday + api.extensions.PeriodArray.weekofyear + api.extensions.PeriodArray.year + api.extensions.PeriodArray.asfreq + api.extensions.PeriodArray.shift + api.extensions.PeriodArray.strftime + api.extensions.PeriodArray.to_timestamp + +.. _api.interval: + +Interval +~~~~~~~~ + +:class:`IntervalArray` is an array for storing data representing intervals. +The scalar type is a :class:`Interval`. These may be stored in a :class:`Series` +or as a :class:`IntervalIndex`. The :class:`IntervalArray` can be closed on the +left or right sides, or both or neither sides. + +.. currentmodule:: pandas + +.. autosummary:: + + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + IntervalArray + +.. autosummary:: + :toctree: generated/ + + IntervalArray.from_arrays + IntervalArray.from_tuples + IntervalArray.from_breaks + IntervalArray.contains + IntervalArray.left + IntervalArray.right + IntervalArray.mid + IntervalArray.closed + IntervalArray.length + IntervalArray.values + IntervalArray.is_non_overlapping_monotonic + IntervalArray.set_closed + IntervalArray.overlaps + IntervalArray.to_tuples + + +.. _api.sparse: + +Sparse +~~~~~~ + +:class:`SparseArray` is an array for efficiently storing data with a commonly +repeated ``fill_value``. See :ref:`sparse` for more. + +.. autosummary:: + :toctree: generated/ + + SparseArray + + +A ``.sparse`` accessor is available on Series with Sparse data. + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_attribute.rst + + Series.sparse.npoints + Series.sparse.density + Series.sparse.fill_value + Series.sparse.sp_values + +.. autosummary:: + :toctree: generated/ + + Series.sparse.from_coo + Series.sparse.to_coo + +.. autosummary:: + :toctree: generated/ + + SparseSeries.to_coo + SparseSeries.from_coo + Data types related functionality ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2684,6 +2824,29 @@ objects. generated/pandas.Series.ix generated/pandas.Series.imag generated/pandas.Series.real + generated/pandas.IntervalArray.argsort + generated/pandas.IntervalArray.astype + generated/pandas.IntervalArray.can_hold_na + generated/pandas.IntervalArray.closed_left + generated/pandas.IntervalArray.closed_right + generated/pandas.IntervalArray.copy + generated/pandas.IntervalArray.dropna + generated/pandas.IntervalArray.dtype + generated/pandas.IntervalArray.factorize + generated/pandas.IntervalArray.fillna + generated/pandas.IntervalArray.isna + generated/pandas.IntervalArray.nbytes + generated/pandas.IntervalArray.ndim + generated/pandas.IntervalArray.open_left + generated/pandas.IntervalArray.open_right + generated/pandas.IntervalArray.repeat + generated/pandas.IntervalArray.shape + generated/pandas.IntervalArray.shift + generated/pandas.IntervalArray.size + generated/pandas.IntervalArray.take + generated/pandas.IntervalArray.unique + generated/pandas.IntervalArray.value_counts + generated/pandas.SparseArray.nonzero .. Can't convince sphinx to generate toctree for this class attribute. diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index acab9de905540..9d7bdc0c9e87f 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -150,11 +150,11 @@ behavior: 2. Categories are unordered. To control those behaviors, instead of passing ``'category'``, use an instance -of :class:`~pandas.api.types.CategoricalDtype`. +of :class:`CategoricalDtype`. .. ipython:: python - from pandas.api.types import CategoricalDtype + from pandas import CategoricalDtype s = pd.Series(["a", "b", "c", "a"]) cat_type = CategoricalDtype(categories=["b", "c", "d"], @@ -227,7 +227,7 @@ A categorical's type is fully described by 1. ``categories``: a sequence of unique values and no missing values 2. ``ordered``: a boolean -This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`. +This information can be stored in a :class:`CategoricalDtype`. The ``categories`` argument is optional, which implies that the actual categories should be inferred from whatever is present in the data when the :class:`pandas.Categorical` is created. The categories are assumed to be unordered @@ -235,20 +235,20 @@ by default. .. ipython:: python - from pandas.api.types import CategoricalDtype + from pandas import CategoricalDtype CategoricalDtype(['a', 'b', 'c']) CategoricalDtype(['a', 'b', 'c'], ordered=True) CategoricalDtype() -A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas +A :class:`CategoricalDtype` can be used in any place pandas expects a `dtype`. For example :func:`pandas.read_csv`, :func:`pandas.DataFrame.astype`, or in the ``Series`` constructor. .. note:: As a convenience, you can use the string ``'category'`` in place of a - :class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of + :class:`CategoricalDtype` when you want the default behavior of the categories being unordered, and equal to the set values present in the array. In other words, ``dtype='category'`` is equivalent to ``dtype=CategoricalDtype()``. @@ -256,7 +256,7 @@ expects a `dtype`. For example :func:`pandas.read_csv`, Equality Semantics ~~~~~~~~~~~~~~~~~~ -Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal +Two instances of :class:`CategoricalDtype` compare equal whenever they have the same categories and order. When comparing two unordered categoricals, the order of the ``categories`` is not considered. @@ -834,7 +834,7 @@ Unioning .. versionadded:: 0.19.0 If you want to combine categoricals that do not necessarily have the same -categories, the :func:`~pandas.api.types.union_categoricals` function will +categories, the :func:`union_categoricals` function will combine a list-like of categoricals. The new categories will be the union of the categories being combined. @@ -884,7 +884,7 @@ using the ``ignore_ordered=True`` argument. b = pd.Categorical(["c", "b", "a"], ordered=True) union_categoricals([a, b], ignore_order=True) -:func:`~pandas.api.types.union_categoricals` also works with a +:func:`union_categoricals` also works with a ``CategoricalIndex``, or ``Series`` containing categorical data, but note that the resulting array will always be a plain ``Categorical``: diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index d02912294060c..93442cf4a6632 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -1047,3 +1047,31 @@ Alternatively, one can convert to an xarray ``DataArray``. p.to_xarray() You can see the full-documentation for the `xarray package `__. + +.. _dsintro.data_type: + +Data Types +---------- + +Every :class:`Index`, :class:`Series`, and column of a :class:`DataFrame` has a data type. +The data type or types are available with :meth:`Index.dtype`, :meth:`Series.dtype` and +:meth:`DataFrame.dtypes` (one dtype per column). + +For the most part, pandas uses NumPy arrays and dtypes. Pandas has made a few extensions +to NumPy for types that are especially important for tabular data analysis. + +===================================== ============================== +Array Type Documentation +===================================== ============================== +:class:`Categorical` :ref:`categorical` +:class:`DatetimeArray` TODO +:class:`api.extensions.IntegerArray` :ref:`integer_na` +:class:`IntervalArray` :ref:`indexing.intervallindex` +:class:`api.extensions.PeriodArray` :ref:`timeseries.periods` +:class:`SparseArray` :ref:`sparse` +===================================== ============================== + +If you need to convert one of these arrays to a NumPy array, use :meth:`numpy.asarray`. +This will necessarily lose the dtype information (since NumPy can't represent these +types) but will preserve equality (values that were equal in pandas extension +array will still be equal in the NumPy array). diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index d2b88e794e51e..07cd992d74e18 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -139,6 +139,7 @@ See the package overview for more detail about what's in the library. timeseries timedeltas categorical + integer_na visualization style io diff --git a/doc/source/integer_na.rst b/doc/source/integer_na.rst new file mode 100644 index 0000000000000..029e48a77092d --- /dev/null +++ b/doc/source/integer_na.rst @@ -0,0 +1,73 @@ +.. currentmodule:: pandas + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + +.. _integer_na: + +******************************** +Integer Data with Missing Values +******************************** + +.. versionadded:: 0.24.0 + +In :ref:`missing_data`, we say that pandas primarily uses ``NaN`` to represent +missing data. The most unfortunate consequence of this is that, because +``NaN`` is a float, an array of integers with missing values will have float +dtype. + +Pandas can represent integer data with missing values with an +:class:`api.extensions.IntegerArray`. This is an :ref:`extension types ` +implemented within pandas. It is not the default dtype and will not be inferred, +you must explicitly create an :class:`api.extensions.IntegerArray` using :func:`integer_array`. + +.. ipython:: python + + arr = integer_array([1, 2, np.nan]) + arr + +This array can be stored in a :class:`DataFrame` or :class:`Series` like any +NumPy array. + +.. ipython:: python + + pd.Series(arr) + +Alternatively, you can instruct pandas to treat an array-like as an +:class:`api.extensions.IntegerArray` by specifying a dtype with a capital "I". + +.. ipython:: python + + s = pd.Series([1, 2, np.nan], dtype="Int64") + s + +Operations involving an integer array will behave similar to NumPy arrays. +Missing values will be propagated, and and the data will be coerced to another +dtype if needed. + +.. ipython:: python + + # arithmetic + s + 1 + + # comparison + s == 1 + + # indexing + s.iloc[1:3] + + # operate with other dtypes + s + s.iloc[1:3].astype('Int8') + + # coerce when needed + s + 0.01 + +Reduction and groupby operations such as 'sum' work as well. + +.. ipython:: python + + df.sum() + df.groupby('B').A.sum() diff --git a/doc/source/io.rst b/doc/source/io.rst index 68faefa872c88..acd726e0f6cc4 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -451,7 +451,7 @@ Specifying Categorical dtype .. versionadded:: 0.19.0 ``Categorical`` columns can be parsed directly by specifying ``dtype='category'`` or -``dtype=CategoricalDtype(categories, ordered)``. +``dtype=pd.CategoricalDtype(categories, ordered)``. .. ipython:: python @@ -473,14 +473,12 @@ specification: Specifying ``dtype='cateogry'`` will result in an unordered ``Categorical`` whose ``categories`` are the unique values observed in the data. For more control on the categories and order, create a -:class:`~pandas.api.types.CategoricalDtype` ahead of time, and pass that for +:class:`CategoricalDtype` ahead of time, and pass that for that column's ``dtype``. .. ipython:: python - from pandas.api.types import CategoricalDtype - - dtype = CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True) + dtype = pd.CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True) pd.read_csv(StringIO(data), dtype={'col1': dtype}).dtypes When using ``dtype=CategoricalDtype``, "unexpected" values outside of @@ -488,7 +486,7 @@ When using ``dtype=CategoricalDtype``, "unexpected" values outside of .. ipython:: python - dtype = CategoricalDtype(['a', 'b', 'd']) # No 'c' + dtype = pd.CategoricalDtype(['a', 'b', 'd']) # No 'c' pd.read_csv(StringIO(data), dtype={'col1': dtype}).col1 This matches the behavior of :meth:`Categorical.set_categories`. diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 98914c13d4d31..477f533db2383 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -858,10 +858,8 @@ The left frame. .. ipython:: python - from pandas.api.types import CategoricalDtype - X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,))) - X = X.astype(CategoricalDtype(categories=['foo', 'bar'])) + X = X.astype(pd.CategoricalDtype(categories=['foo', 'bar'])) left = pd.DataFrame({'X': X, 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) @@ -874,7 +872,7 @@ The right frame. right = pd.DataFrame({ 'X': pd.Series(['foo', 'bar'], - dtype=CategoricalDtype(['foo', 'bar'])), + dtype=pd.CategoricalDtype(['foo', 'bar'])), 'Z': [1, 2] }) right diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index a52c80106f100..a34c069039a13 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -88,14 +88,14 @@ pandas captures 4 general time related concepts: #. Time spans: A span of time defined by a point in time and its associated frequency. #. Date offsets: A relative time duration that respects calendar arithmetic. Similar to ``dateutil.relativedelta.relativedelta`` from the ``dateutil`` package. -===================== ================= =================== ============================================ ======================================== +===================== ================= =================== ============================================ ================================================ Concept Scalar Class Array Class pandas Data Type Primary Creation Method -===================== ================= =================== ============================================ ======================================== +===================== ================= =================== ============================================ ================================================ Date times ``Timestamp`` ``DatetimeIndex`` ``datetime64[ns]`` or ``datetime64[ns, tz]`` ``to_datetime`` or ``date_range`` Time deltas ``Timedelta`` ``TimedeltaIndex`` ``timedelta64[ns]`` ``to_timedelta`` or ``timedelta_range`` -Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period`` or ``period_range`` +Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period``, ``period_range`` or ``period_array`` Date offsets ``DateOffset`` ``None`` ``None`` ``DateOffset`` -===================== ================= =================== ============================================ ======================================== +===================== ================= =================== ============================================ ================================================ For time series data, it's conventional to represent the time component in the index of a :class:`Series` or :class:`DataFrame` so manipulations can be performed with respect to the time element. diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 33d45f8d4444d..f8dd1a92b4ed3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -22,6 +22,31 @@ dataframe's indexes from the resulting Parquet file. (:issue:`20768`) - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) +.. _whatsnew_0240.enhancements.extension_api: + +Pandas Extension Types +^^^^^^^^^^^^^^^^^^^^^^ + +All of pandas internal extension types (datetime with timezone, categorical, +period, interval, sparse) now follow the public Extension Array API. The +following methods and classes have been added to the public API for working with +these types. + +**dtypes** + +- :class:`CategoricalDtype` + +**Array Constructors** + +- :class:`period_array` +- :class:`integer_array` + +**Array Classes** + +- :class:`IntervalArray` +- :class:`api.extensions.PeriodArray` +- :class:`api.extensions.IntegerArray` + .. _whatsnew_0240.enhancements.extension_array_operators: ``ExtensionArray`` operator support @@ -99,7 +124,9 @@ Reduction and groupby operations such as 'sum' work. .. warning:: - The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date. + The Integer NA support currently uses the capitalized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date. + +See :ref:`integer_na` for more. .. _whatsnew_0240.enhancements.read_html: diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 51555c57b2288..05afe41a1fb83 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -1,10 +1,14 @@ """Public API for extending panadas objects.""" -from pandas.core.accessor import (register_dataframe_accessor, # noqa +from pandas.core.accessor import (register_dataframe_accessor, # noqa: F401 register_index_accessor, register_series_accessor) -from pandas.core.algorithms import take # noqa -from pandas.core.arrays import (ExtensionArray, # noqa +from pandas.core.algorithms import take # noqa: F401 +from pandas.core.arrays import (ExtensionArray, # noqa: F401 ExtensionScalarOpsMixin) -from pandas.core.dtypes.dtypes import ( # noqa - ExtensionDtype, register_extension_dtype +from pandas.core.dtypes.dtypes import ( # noqa: F401 + ExtensionDtype, register_extension_dtype, +) +# ExtensionArrays not publicly exposed elsewhere +from pandas.core.arrays import ( # noqa: F401 + IntegerArray, PeriodArray, ) diff --git a/pandas/core/api.py b/pandas/core/api.py index ad35b647ac458..852f5e28cbe00 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,8 +5,11 @@ import numpy as np from pandas.core.algorithms import factorize, unique, value_counts +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.missing import isna, isnull, notna, notnull -from pandas.core.arrays import Categorical +from pandas.core.arrays import (Categorical, period_array, integer_array, + IntervalArray, SparseArray) +from pandas.core.arrays.sparse import SparseDtype from pandas.core.groupby import Grouper from pandas.io.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4363f3ccb14e2..8091552f04683 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -315,7 +315,7 @@ class Categorical(ExtensionArray, PandasObject): See also -------- - pandas.api.types.CategoricalDtype : Type for categorical data + CategoricalDtype : Type for categorical data CategoricalIndex : An Index with an underlying ``Categorical`` """ @@ -484,7 +484,7 @@ def ordered(self): @property def dtype(self): - """The :class:`~pandas.api.types.CategoricalDtype` for this instance""" + """The :class:`CategoricalDtype` for this instance""" return self._dtype @property diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 94be29893d2b9..a791a81b2d943 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -81,7 +81,9 @@ from_arrays from_tuples from_breaks +overlaps set_closed +to_tuples %(extra_methods)s\ %(examples)s\ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ea7eeb7fc9f8e..5e4029f63a554 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -146,8 +146,38 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): See Also -------- - period_array : Create a new PeriodArray + pandas.period_array : Create a new PeriodArray pandas.PeriodIndex : Immutable Index for period data + + Attributes + ---------- + day + dayofweek + dayofyear + days_in_month + daysinmonth + end_time + freq + freqstr + hour + is_leap_year + minute + month + quarter + qyear + second + start_time + week + weekday + weekofyear + year + + Methods + ------- + asfreq + shift + strftime + to_timestamp """ _attributes = ["freq"] _typ = "periodarray" # ABCPeriodArray @@ -583,7 +613,7 @@ def to_timestamp(self, freq=None, how='start'): ------- DatetimeArray/Index """ - from pandas.core.arrays import DatetimeArrayMixin + from pandas import DatetimeIndex how = libperiod._validate_end_alias(how) @@ -607,7 +637,7 @@ def to_timestamp(self, freq=None, how='start'): new_data = self.asfreq(freq, how=how) new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) - return DatetimeArrayMixin(new_data, freq='infer') + return DatetimeIndex(new_data, freq='infer') # ------------------------------------------------------------------ # Formatting @@ -633,7 +663,7 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): def repeat(self, repeats, *args, **kwargs): """ - Repeat elements of a Categorical. + Repeat elements of a PeriodArray. See also -------- @@ -891,7 +921,7 @@ def period_array(data, freq=None, copy=False): See Also -------- - PeriodArray + pandas.api.extensions.PeriodArray pandas.PeriodIndex Examples diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index da894a0881400..97a4e698c556f 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -45,7 +45,8 @@ class TestPDApi(Base): 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', 'Series', 'SparseArray', 'SparseDataFrame', 'SparseDtype', 'SparseSeries', 'Timedelta', - 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex'] + 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex', + 'IntervalArray', 'CategoricalDtype'] # these are already deprecated; awaiting removal deprecated_classes = ['TimeGrouper'] @@ -58,12 +59,12 @@ class TestPDApi(Base): # top-level functions funcs = ['bdate_range', 'concat', 'crosstab', 'cut', - 'date_range', 'interval_range', 'eval', + 'date_range', 'integer_array', 'interval_range', 'eval', 'factorize', 'get_dummies', 'infer_freq', 'isna', 'isnull', 'lreshape', 'melt', 'notna', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', - 'period_range', + 'period_array', 'period_range', 'pivot', 'pivot_table', 'qcut', 'show_versions', 'timedelta_range', 'unique', 'value_counts', 'wide_to_long']