diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 608e2c8e72ded..7f0cfbbd2414a 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -717,11 +717,9 @@ and allows efficient indexing and storage of an index with a large number of dup
.. ipython:: python
- from pandas.api.types import CategoricalDtype
-
df = pd.DataFrame({'A': np.arange(6),
'B': list('aabbca')})
- df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
+ df['B'] = df['B'].astype(pd.CategoricalDtype(list('cab')))
df
df.dtypes
df.B.cat.categories
@@ -747,7 +745,7 @@ The ``CategoricalIndex`` is **preserved** after indexing:
df2.loc['a'].index
Sorting the index will sort by the order of the categories (recall that we
-created the index with ``CategoricalDtype(list('cab'))``, so the sorted
+created the index with ``pd.CategoricalDtype(list('cab'))``, so the sorted
order is ``cab``).
.. ipython:: python
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 665649aead33c..60f3f078e2e75 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -702,93 +702,6 @@ strings and apply several methods to it. These can be accessed like
Series.dt
Index.str
-.. _api.categorical:
-
-Categorical
-~~~~~~~~~~~
-
-Pandas defines a custom data type for representing data that can take only a
-limited, fixed set of values. The dtype of a ``Categorical`` can be described by
-a :class:`pandas.api.types.CategoricalDtype`.
-
-.. autosummary::
- :toctree: generated/
- :template: autosummary/class_without_autosummary.rst
-
- api.types.CategoricalDtype
-
-.. autosummary::
- :toctree: generated/
-
- api.types.CategoricalDtype.categories
- api.types.CategoricalDtype.ordered
-
-Categorical data can be stored in a :class:`pandas.Categorical`
-
-.. autosummary::
- :toctree: generated/
- :template: autosummary/class_without_autosummary.rst
-
- Categorical
-
-
-The alternative :meth:`Categorical.from_codes` constructor can be used when you
-have the categories and integer codes already:
-
-.. autosummary::
- :toctree: generated/
-
- Categorical.from_codes
-
-The dtype information is available on the ``Categorical``
-
-.. autosummary::
- :toctree: generated/
-
- Categorical.dtype
- Categorical.categories
- Categorical.ordered
- Categorical.codes
-
-``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts
-the Categorical back to a NumPy array, so categories and order information is not preserved!
-
-.. autosummary::
- :toctree: generated/
-
- Categorical.__array__
-
-A ``Categorical`` can be stored in a ``Series`` or ``DataFrame``.
-To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or
-``Series(..., dtype=dtype)`` where ``dtype`` is either
-
-* the string ``'category'``
-* an instance of :class:`~pandas.api.types.CategoricalDtype`.
-
-If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical
-data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the
-following usable methods and properties:
-
-.. autosummary::
- :toctree: generated/
- :template: autosummary/accessor_attribute.rst
-
- Series.cat.categories
- Series.cat.ordered
- Series.cat.codes
-
-.. autosummary::
- :toctree: generated/
- :template: autosummary/accessor_method.rst
-
- Series.cat.rename_categories
- Series.cat.reorder_categories
- Series.cat.add_categories
- Series.cat.remove_categories
- Series.cat.remove_unused_categories
- Series.cat.set_categories
- Series.cat.as_ordered
- Series.cat.as_unordered
Plotting
~~~~~~~~
@@ -842,29 +755,6 @@ Serialization / IO / Conversion
Series.to_clipboard
Series.to_latex
-Sparse
-~~~~~~
-.. autosummary::
- :toctree: generated/
-
- SparseSeries.to_coo
- SparseSeries.from_coo
-
-.. autosummary::
- :toctree: generated/
- :template: autosummary/accessor_attribute.rst
-
- Series.sparse.npoints
- Series.sparse.density
- Series.sparse.fill_value
- Series.sparse.sp_values
-
-
-.. autosummary::
- :toctree: generated/
-
- Series.sparse.from_coo
- Series.sparse.to_coo
.. _api.dataframe:
@@ -1675,7 +1565,7 @@ IntervalIndex Components
IntervalIndex.get_indexer
IntervalIndex.set_closed
IntervalIndex.overlaps
-
+ IntervalIndex.to_tuples
.. _api.multiindex:
@@ -2567,6 +2457,256 @@ Exceptions and warnings
errors.UnsortedIndexError
errors.UnsupportedFunctionCall
+.. _api.extension:
+
+Extension Types
+---------------
+
+Pandas implements several :class:`api.extension.ExtensoinArray` types.
+These arrays may be placed in Series or a column of a DataFrame. Some
+may also have a specialized index type.
+
+.. _api.categorical:
+
+Categorical
+~~~~~~~~~~~
+
+Pandas defines a custom data type for representing data that can take only a
+limited, fixed set of values. The dtype of a ``Categorical`` can be described by
+a :class:`CategoricalDtype`.
+See :ref:`categorical` for more on working with Categorical data.
+
+.. autosummary::
+ :toctree: generated/
+ :template: autosummary/class_without_autosummary.rst
+
+ CategoricalDtype
+
+.. autosummary::
+ :toctree: generated/
+
+ CategoricalDtype.categories
+ CategoricalDtype.ordered
+
+Categorical data can be stored in a :class:`pandas.Categorical`
+
+.. autosummary::
+ :toctree: generated/
+ :template: autosummary/class_without_autosummary.rst
+
+ Categorical
+
+
+The alternative :meth:`Categorical.from_codes` constructor can be used when you
+have the categories and integer codes already:
+
+.. autosummary::
+ :toctree: generated/
+
+ Categorical.from_codes
+
+The dtype of a ``Categorical`` is a :class:`CategoricalDtype`. The dtype
+stores the actual categories, and whether the categories are ordered.
+
+.. autosummary:
+ :toctree: generated/
+
+ CategoricalDtype
+
+The dtype information is also available directly on the ``Categorical``.
+
+.. autosummary::
+ :toctree: generated/
+
+ Categorical.dtype
+ Categorical.categories
+ Categorical.ordered
+ Categorical.codes
+
+``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts
+the Categorical back to a NumPy array, so categories and order information is not preserved!
+
+.. autosummary::
+ :toctree: generated/
+
+ Categorical.__array__
+
+A ``Categorical`` can be stored in a ``Series`` or ``DataFrame``.
+To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or
+``Series(..., dtype=dtype)`` where ``dtype`` is either
+
+* the string ``'category'``
+* an instance of :class:`CategoricalDtype`.
+
+If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical
+data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the
+following usable methods and properties:
+
+.. autosummary::
+ :toctree: generated/
+ :template: autosummary/accessor_attribute.rst
+
+ Series.cat.categories
+ Series.cat.ordered
+ Series.cat.codes
+
+.. autosummary::
+ :toctree: generated/
+ :template: autosummary/accessor_method.rst
+
+ Series.cat.rename_categories
+ Series.cat.reorder_categories
+ Series.cat.add_categories
+ Series.cat.remove_categories
+ Series.cat.remove_unused_categories
+ Series.cat.set_categories
+ Series.cat.as_ordered
+ Series.cat.as_unordered
+
+.. _api.integer:
+
+Integer-NA
+~~~~~~~~~~
+
+:class:`api.extensions.IntegerArray` can be used to store integer-value data
+that may contain missing values. This works around the consequence of using
+:attr:`numpy.nan`, a floating point value, as a missing value sentinel.
+See :ref:`integer_na` for more.
+
+.. autosummary::
+ :toctree: generated/
+
+ integer_array
+
+.. autosummary::
+ :toctree: generated/
+
+ api.extensions.IntegerArray
+
+.. _api.period:
+
+Period
+~~~~~~
+
+:class:`api.extensions.PeriodArray` is an array for data representing a time span.
+The scalar type is a :class:`Period`. These may be stored in a :class:`Series`
+or as a :class:`PeriodIndex`. :func:`period_array` should be used to create a
+new :class:`api.extensions.PeriodArray`.
+
+.. autosummary::
+ :toctree: generated/
+
+ period_array
+
+.. autosummary::
+ :toctree: generated/
+ :template: autosummary/class_without_autosummary.rst
+
+ api.extensions.PeriodArray
+
+.. autosummary::
+ :toctree: generated/
+
+ api.extensions.PeriodArray.day
+ api.extensions.PeriodArray.dayofweek
+ api.extensions.PeriodArray.dayofyear
+ api.extensions.PeriodArray.days_in_month
+ api.extensions.PeriodArray.daysinmonth
+ api.extensions.PeriodArray.end_time
+ api.extensions.PeriodArray.freq
+ api.extensions.PeriodArray.freqstr
+ api.extensions.PeriodArray.hour
+ api.extensions.PeriodArray.is_leap_year
+ api.extensions.PeriodArray.minute
+ api.extensions.PeriodArray.month
+ api.extensions.PeriodArray.quarter
+ api.extensions.PeriodArray.qyear
+ api.extensions.PeriodArray.second
+ api.extensions.PeriodArray.start_time
+ api.extensions.PeriodArray.week
+ api.extensions.PeriodArray.weekday
+ api.extensions.PeriodArray.weekofyear
+ api.extensions.PeriodArray.year
+ api.extensions.PeriodArray.asfreq
+ api.extensions.PeriodArray.shift
+ api.extensions.PeriodArray.strftime
+ api.extensions.PeriodArray.to_timestamp
+
+.. _api.interval:
+
+Interval
+~~~~~~~~
+
+:class:`IntervalArray` is an array for storing data representing intervals.
+The scalar type is a :class:`Interval`. These may be stored in a :class:`Series`
+or as a :class:`IntervalIndex`. The :class:`IntervalArray` can be closed on the
+left or right sides, or both or neither sides.
+
+.. currentmodule:: pandas
+
+.. autosummary::
+
+ :toctree: generated/
+ :template: autosummary/class_without_autosummary.rst
+
+ IntervalArray
+
+.. autosummary::
+ :toctree: generated/
+
+ IntervalArray.from_arrays
+ IntervalArray.from_tuples
+ IntervalArray.from_breaks
+ IntervalArray.contains
+ IntervalArray.left
+ IntervalArray.right
+ IntervalArray.mid
+ IntervalArray.closed
+ IntervalArray.length
+ IntervalArray.values
+ IntervalArray.is_non_overlapping_monotonic
+ IntervalArray.set_closed
+ IntervalArray.overlaps
+ IntervalArray.to_tuples
+
+
+.. _api.sparse:
+
+Sparse
+~~~~~~
+
+:class:`SparseArray` is an array for efficiently storing data with a commonly
+repeated ``fill_value``. See :ref:`sparse` for more.
+
+.. autosummary::
+ :toctree: generated/
+
+ SparseArray
+
+
+A ``.sparse`` accessor is available on Series with Sparse data.
+
+.. autosummary::
+ :toctree: generated/
+ :template: autosummary/accessor_attribute.rst
+
+ Series.sparse.npoints
+ Series.sparse.density
+ Series.sparse.fill_value
+ Series.sparse.sp_values
+
+.. autosummary::
+ :toctree: generated/
+
+ Series.sparse.from_coo
+ Series.sparse.to_coo
+
+.. autosummary::
+ :toctree: generated/
+
+ SparseSeries.to_coo
+ SparseSeries.from_coo
+
Data types related functionality
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2684,6 +2824,29 @@ objects.
generated/pandas.Series.ix
generated/pandas.Series.imag
generated/pandas.Series.real
+ generated/pandas.IntervalArray.argsort
+ generated/pandas.IntervalArray.astype
+ generated/pandas.IntervalArray.can_hold_na
+ generated/pandas.IntervalArray.closed_left
+ generated/pandas.IntervalArray.closed_right
+ generated/pandas.IntervalArray.copy
+ generated/pandas.IntervalArray.dropna
+ generated/pandas.IntervalArray.dtype
+ generated/pandas.IntervalArray.factorize
+ generated/pandas.IntervalArray.fillna
+ generated/pandas.IntervalArray.isna
+ generated/pandas.IntervalArray.nbytes
+ generated/pandas.IntervalArray.ndim
+ generated/pandas.IntervalArray.open_left
+ generated/pandas.IntervalArray.open_right
+ generated/pandas.IntervalArray.repeat
+ generated/pandas.IntervalArray.shape
+ generated/pandas.IntervalArray.shift
+ generated/pandas.IntervalArray.size
+ generated/pandas.IntervalArray.take
+ generated/pandas.IntervalArray.unique
+ generated/pandas.IntervalArray.value_counts
+ generated/pandas.SparseArray.nonzero
.. Can't convince sphinx to generate toctree for this class attribute.
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index acab9de905540..9d7bdc0c9e87f 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -150,11 +150,11 @@ behavior:
2. Categories are unordered.
To control those behaviors, instead of passing ``'category'``, use an instance
-of :class:`~pandas.api.types.CategoricalDtype`.
+of :class:`CategoricalDtype`.
.. ipython:: python
- from pandas.api.types import CategoricalDtype
+ from pandas import CategoricalDtype
s = pd.Series(["a", "b", "c", "a"])
cat_type = CategoricalDtype(categories=["b", "c", "d"],
@@ -227,7 +227,7 @@ A categorical's type is fully described by
1. ``categories``: a sequence of unique values and no missing values
2. ``ordered``: a boolean
-This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`.
+This information can be stored in a :class:`CategoricalDtype`.
The ``categories`` argument is optional, which implies that the actual categories
should be inferred from whatever is present in the data when the
:class:`pandas.Categorical` is created. The categories are assumed to be unordered
@@ -235,20 +235,20 @@ by default.
.. ipython:: python
- from pandas.api.types import CategoricalDtype
+ from pandas import CategoricalDtype
CategoricalDtype(['a', 'b', 'c'])
CategoricalDtype(['a', 'b', 'c'], ordered=True)
CategoricalDtype()
-A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas
+A :class:`CategoricalDtype` can be used in any place pandas
expects a `dtype`. For example :func:`pandas.read_csv`,
:func:`pandas.DataFrame.astype`, or in the ``Series`` constructor.
.. note::
As a convenience, you can use the string ``'category'`` in place of a
- :class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of
+ :class:`CategoricalDtype` when you want the default behavior of
the categories being unordered, and equal to the set values present in the
array. In other words, ``dtype='category'`` is equivalent to
``dtype=CategoricalDtype()``.
@@ -256,7 +256,7 @@ expects a `dtype`. For example :func:`pandas.read_csv`,
Equality Semantics
~~~~~~~~~~~~~~~~~~
-Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal
+Two instances of :class:`CategoricalDtype` compare equal
whenever they have the same categories and order. When comparing two
unordered categoricals, the order of the ``categories`` is not considered.
@@ -834,7 +834,7 @@ Unioning
.. versionadded:: 0.19.0
If you want to combine categoricals that do not necessarily have the same
-categories, the :func:`~pandas.api.types.union_categoricals` function will
+categories, the :func:`union_categoricals` function will
combine a list-like of categoricals. The new categories will be the union of
the categories being combined.
@@ -884,7 +884,7 @@ using the ``ignore_ordered=True`` argument.
b = pd.Categorical(["c", "b", "a"], ordered=True)
union_categoricals([a, b], ignore_order=True)
-:func:`~pandas.api.types.union_categoricals` also works with a
+:func:`union_categoricals` also works with a
``CategoricalIndex``, or ``Series`` containing categorical data, but note that
the resulting array will always be a plain ``Categorical``:
diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index d02912294060c..93442cf4a6632 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -1047,3 +1047,31 @@ Alternatively, one can convert to an xarray ``DataArray``.
p.to_xarray()
You can see the full-documentation for the `xarray package `__.
+
+.. _dsintro.data_type:
+
+Data Types
+----------
+
+Every :class:`Index`, :class:`Series`, and column of a :class:`DataFrame` has a data type.
+The data type or types are available with :meth:`Index.dtype`, :meth:`Series.dtype` and
+:meth:`DataFrame.dtypes` (one dtype per column).
+
+For the most part, pandas uses NumPy arrays and dtypes. Pandas has made a few extensions
+to NumPy for types that are especially important for tabular data analysis.
+
+===================================== ==============================
+Array Type Documentation
+===================================== ==============================
+:class:`Categorical` :ref:`categorical`
+:class:`DatetimeArray` TODO
+:class:`api.extensions.IntegerArray` :ref:`integer_na`
+:class:`IntervalArray` :ref:`indexing.intervallindex`
+:class:`api.extensions.PeriodArray` :ref:`timeseries.periods`
+:class:`SparseArray` :ref:`sparse`
+===================================== ==============================
+
+If you need to convert one of these arrays to a NumPy array, use :meth:`numpy.asarray`.
+This will necessarily lose the dtype information (since NumPy can't represent these
+types) but will preserve equality (values that were equal in pandas extension
+array will still be equal in the NumPy array).
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index d2b88e794e51e..07cd992d74e18 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -139,6 +139,7 @@ See the package overview for more detail about what's in the library.
timeseries
timedeltas
categorical
+ integer_na
visualization
style
io
diff --git a/doc/source/integer_na.rst b/doc/source/integer_na.rst
new file mode 100644
index 0000000000000..029e48a77092d
--- /dev/null
+++ b/doc/source/integer_na.rst
@@ -0,0 +1,73 @@
+.. currentmodule:: pandas
+
+.. ipython:: python
+ :suppress:
+
+ import numpy as np
+ import pandas as pd
+
+.. _integer_na:
+
+********************************
+Integer Data with Missing Values
+********************************
+
+.. versionadded:: 0.24.0
+
+In :ref:`missing_data`, we say that pandas primarily uses ``NaN`` to represent
+missing data. The most unfortunate consequence of this is that, because
+``NaN`` is a float, an array of integers with missing values will have float
+dtype.
+
+Pandas can represent integer data with missing values with an
+:class:`api.extensions.IntegerArray`. This is an :ref:`extension types `
+implemented within pandas. It is not the default dtype and will not be inferred,
+you must explicitly create an :class:`api.extensions.IntegerArray` using :func:`integer_array`.
+
+.. ipython:: python
+
+ arr = integer_array([1, 2, np.nan])
+ arr
+
+This array can be stored in a :class:`DataFrame` or :class:`Series` like any
+NumPy array.
+
+.. ipython:: python
+
+ pd.Series(arr)
+
+Alternatively, you can instruct pandas to treat an array-like as an
+:class:`api.extensions.IntegerArray` by specifying a dtype with a capital "I".
+
+.. ipython:: python
+
+ s = pd.Series([1, 2, np.nan], dtype="Int64")
+ s
+
+Operations involving an integer array will behave similar to NumPy arrays.
+Missing values will be propagated, and and the data will be coerced to another
+dtype if needed.
+
+.. ipython:: python
+
+ # arithmetic
+ s + 1
+
+ # comparison
+ s == 1
+
+ # indexing
+ s.iloc[1:3]
+
+ # operate with other dtypes
+ s + s.iloc[1:3].astype('Int8')
+
+ # coerce when needed
+ s + 0.01
+
+Reduction and groupby operations such as 'sum' work as well.
+
+.. ipython:: python
+
+ df.sum()
+ df.groupby('B').A.sum()
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 68faefa872c88..acd726e0f6cc4 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -451,7 +451,7 @@ Specifying Categorical dtype
.. versionadded:: 0.19.0
``Categorical`` columns can be parsed directly by specifying ``dtype='category'`` or
-``dtype=CategoricalDtype(categories, ordered)``.
+``dtype=pd.CategoricalDtype(categories, ordered)``.
.. ipython:: python
@@ -473,14 +473,12 @@ specification:
Specifying ``dtype='cateogry'`` will result in an unordered ``Categorical``
whose ``categories`` are the unique values observed in the data. For more
control on the categories and order, create a
-:class:`~pandas.api.types.CategoricalDtype` ahead of time, and pass that for
+:class:`CategoricalDtype` ahead of time, and pass that for
that column's ``dtype``.
.. ipython:: python
- from pandas.api.types import CategoricalDtype
-
- dtype = CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True)
+ dtype = pd.CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True)
pd.read_csv(StringIO(data), dtype={'col1': dtype}).dtypes
When using ``dtype=CategoricalDtype``, "unexpected" values outside of
@@ -488,7 +486,7 @@ When using ``dtype=CategoricalDtype``, "unexpected" values outside of
.. ipython:: python
- dtype = CategoricalDtype(['a', 'b', 'd']) # No 'c'
+ dtype = pd.CategoricalDtype(['a', 'b', 'd']) # No 'c'
pd.read_csv(StringIO(data), dtype={'col1': dtype}).col1
This matches the behavior of :meth:`Categorical.set_categories`.
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 98914c13d4d31..477f533db2383 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -858,10 +858,8 @@ The left frame.
.. ipython:: python
- from pandas.api.types import CategoricalDtype
-
X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,)))
- X = X.astype(CategoricalDtype(categories=['foo', 'bar']))
+ X = X.astype(pd.CategoricalDtype(categories=['foo', 'bar']))
left = pd.DataFrame({'X': X,
'Y': np.random.choice(['one', 'two', 'three'], size=(10,))})
@@ -874,7 +872,7 @@ The right frame.
right = pd.DataFrame({
'X': pd.Series(['foo', 'bar'],
- dtype=CategoricalDtype(['foo', 'bar'])),
+ dtype=pd.CategoricalDtype(['foo', 'bar'])),
'Z': [1, 2]
})
right
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index a52c80106f100..a34c069039a13 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -88,14 +88,14 @@ pandas captures 4 general time related concepts:
#. Time spans: A span of time defined by a point in time and its associated frequency.
#. Date offsets: A relative time duration that respects calendar arithmetic. Similar to ``dateutil.relativedelta.relativedelta`` from the ``dateutil`` package.
-===================== ================= =================== ============================================ ========================================
+===================== ================= =================== ============================================ ================================================
Concept Scalar Class Array Class pandas Data Type Primary Creation Method
-===================== ================= =================== ============================================ ========================================
+===================== ================= =================== ============================================ ================================================
Date times ``Timestamp`` ``DatetimeIndex`` ``datetime64[ns]`` or ``datetime64[ns, tz]`` ``to_datetime`` or ``date_range``
Time deltas ``Timedelta`` ``TimedeltaIndex`` ``timedelta64[ns]`` ``to_timedelta`` or ``timedelta_range``
-Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period`` or ``period_range``
+Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period``, ``period_range`` or ``period_array``
Date offsets ``DateOffset`` ``None`` ``None`` ``DateOffset``
-===================== ================= =================== ============================================ ========================================
+===================== ================= =================== ============================================ ================================================
For time series data, it's conventional to represent the time component in the index of a :class:`Series` or :class:`DataFrame`
so manipulations can be performed with respect to the time element.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 33d45f8d4444d..f8dd1a92b4ed3 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -22,6 +22,31 @@ dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
+.. _whatsnew_0240.enhancements.extension_api:
+
+Pandas Extension Types
+^^^^^^^^^^^^^^^^^^^^^^
+
+All of pandas internal extension types (datetime with timezone, categorical,
+period, interval, sparse) now follow the public Extension Array API. The
+following methods and classes have been added to the public API for working with
+these types.
+
+**dtypes**
+
+- :class:`CategoricalDtype`
+
+**Array Constructors**
+
+- :class:`period_array`
+- :class:`integer_array`
+
+**Array Classes**
+
+- :class:`IntervalArray`
+- :class:`api.extensions.PeriodArray`
+- :class:`api.extensions.IntegerArray`
+
.. _whatsnew_0240.enhancements.extension_array_operators:
``ExtensionArray`` operator support
@@ -99,7 +124,9 @@ Reduction and groupby operations such as 'sum' work.
.. warning::
- The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
+ The Integer NA support currently uses the capitalized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
+
+See :ref:`integer_na` for more.
.. _whatsnew_0240.enhancements.read_html:
diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py
index 51555c57b2288..05afe41a1fb83 100644
--- a/pandas/api/extensions/__init__.py
+++ b/pandas/api/extensions/__init__.py
@@ -1,10 +1,14 @@
"""Public API for extending panadas objects."""
-from pandas.core.accessor import (register_dataframe_accessor, # noqa
+from pandas.core.accessor import (register_dataframe_accessor, # noqa: F401
register_index_accessor,
register_series_accessor)
-from pandas.core.algorithms import take # noqa
-from pandas.core.arrays import (ExtensionArray, # noqa
+from pandas.core.algorithms import take # noqa: F401
+from pandas.core.arrays import (ExtensionArray, # noqa: F401
ExtensionScalarOpsMixin)
-from pandas.core.dtypes.dtypes import ( # noqa
- ExtensionDtype, register_extension_dtype
+from pandas.core.dtypes.dtypes import ( # noqa: F401
+ ExtensionDtype, register_extension_dtype,
+)
+# ExtensionArrays not publicly exposed elsewhere
+from pandas.core.arrays import ( # noqa: F401
+ IntegerArray, PeriodArray,
)
diff --git a/pandas/core/api.py b/pandas/core/api.py
index ad35b647ac458..852f5e28cbe00 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -5,8 +5,11 @@
import numpy as np
from pandas.core.algorithms import factorize, unique, value_counts
+from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import isna, isnull, notna, notnull
-from pandas.core.arrays import Categorical
+from pandas.core.arrays import (Categorical, period_array, integer_array,
+ IntervalArray, SparseArray)
+from pandas.core.arrays.sparse import SparseDtype
from pandas.core.groupby import Grouper
from pandas.io.formats.format import set_eng_float_format
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 4363f3ccb14e2..8091552f04683 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -315,7 +315,7 @@ class Categorical(ExtensionArray, PandasObject):
See also
--------
- pandas.api.types.CategoricalDtype : Type for categorical data
+ CategoricalDtype : Type for categorical data
CategoricalIndex : An Index with an underlying ``Categorical``
"""
@@ -484,7 +484,7 @@ def ordered(self):
@property
def dtype(self):
- """The :class:`~pandas.api.types.CategoricalDtype` for this instance"""
+ """The :class:`CategoricalDtype` for this instance"""
return self._dtype
@property
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 94be29893d2b9..a791a81b2d943 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -81,7 +81,9 @@
from_arrays
from_tuples
from_breaks
+overlaps
set_closed
+to_tuples
%(extra_methods)s\
%(examples)s\
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index ea7eeb7fc9f8e..5e4029f63a554 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -146,8 +146,38 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray):
See Also
--------
- period_array : Create a new PeriodArray
+ pandas.period_array : Create a new PeriodArray
pandas.PeriodIndex : Immutable Index for period data
+
+ Attributes
+ ----------
+ day
+ dayofweek
+ dayofyear
+ days_in_month
+ daysinmonth
+ end_time
+ freq
+ freqstr
+ hour
+ is_leap_year
+ minute
+ month
+ quarter
+ qyear
+ second
+ start_time
+ week
+ weekday
+ weekofyear
+ year
+
+ Methods
+ -------
+ asfreq
+ shift
+ strftime
+ to_timestamp
"""
_attributes = ["freq"]
_typ = "periodarray" # ABCPeriodArray
@@ -583,7 +613,7 @@ def to_timestamp(self, freq=None, how='start'):
-------
DatetimeArray/Index
"""
- from pandas.core.arrays import DatetimeArrayMixin
+ from pandas import DatetimeIndex
how = libperiod._validate_end_alias(how)
@@ -607,7 +637,7 @@ def to_timestamp(self, freq=None, how='start'):
new_data = self.asfreq(freq, how=how)
new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
- return DatetimeArrayMixin(new_data, freq='infer')
+ return DatetimeIndex(new_data, freq='infer')
# ------------------------------------------------------------------
# Formatting
@@ -633,7 +663,7 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
def repeat(self, repeats, *args, **kwargs):
"""
- Repeat elements of a Categorical.
+ Repeat elements of a PeriodArray.
See also
--------
@@ -891,7 +921,7 @@ def period_array(data, freq=None, copy=False):
See Also
--------
- PeriodArray
+ pandas.api.extensions.PeriodArray
pandas.PeriodIndex
Examples
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index da894a0881400..97a4e698c556f 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -45,7 +45,8 @@ class TestPDApi(Base):
'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index',
'Series', 'SparseArray', 'SparseDataFrame', 'SparseDtype',
'SparseSeries', 'Timedelta',
- 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex']
+ 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex',
+ 'IntervalArray', 'CategoricalDtype']
# these are already deprecated; awaiting removal
deprecated_classes = ['TimeGrouper']
@@ -58,12 +59,12 @@ class TestPDApi(Base):
# top-level functions
funcs = ['bdate_range', 'concat', 'crosstab', 'cut',
- 'date_range', 'interval_range', 'eval',
+ 'date_range', 'integer_array', 'interval_range', 'eval',
'factorize', 'get_dummies',
'infer_freq', 'isna', 'isnull', 'lreshape',
'melt', 'notna', 'notnull', 'offsets',
'merge', 'merge_ordered', 'merge_asof',
- 'period_range',
+ 'period_array', 'period_range',
'pivot', 'pivot_table', 'qcut',
'show_versions', 'timedelta_range', 'unique',
'value_counts', 'wide_to_long']