Skip to content

Commit 90140f0

Browse files
authored
ENH: Implement DataFrame interchange protocol (#46141)
1 parent 4eb4be2 commit 90140f0

18 files changed

+2121
-1
lines changed

doc/source/reference/frame.rst

+1
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,4 @@ Serialization / IO / conversion
391391
DataFrame.to_clipboard
392392
DataFrame.to_markdown
393393
DataFrame.style
394+
DataFrame.__dataframe__

doc/source/reference/general_functions.rst

+7
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,10 @@ Hashing
7878

7979
util.hash_array
8080
util.hash_pandas_object
81+
82+
Importing from other DataFrame libraries
83+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
84+
.. autosummary::
85+
:toctree: api/
86+
87+
api.exchange.from_dataframe

doc/source/whatsnew/v1.5.0.rst

+18
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,24 @@ including other versions of pandas.
1414
Enhancements
1515
~~~~~~~~~~~~
1616

17+
.. _whatsnew_150.enhancements.dataframe_exchange:
18+
19+
DataFrame exchange protocol implementation
20+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
21+
22+
Pandas now implement the DataFrame exchange API spec.
23+
See the full details on the API at https://data-apis.org/dataframe-protocol/latest/index.html
24+
25+
The protocol consists of two parts:
26+
27+
- New method :meth:`DataFrame.__dataframe__` which produces the exchange object.
28+
It effectively "exports" the Pandas dataframe as an exchange object so
29+
any other library which has the protocol implemented can "import" that dataframe
30+
without knowing anything about the producer except that it makes an exchange object.
31+
- New function :func:`pandas.api.exchange.from_dataframe` which can take
32+
an arbitrary exchange object from any conformant library and construct a
33+
Pandas DataFrame out of it.
34+
1735
.. _whatsnew_150.enhancements.styler:
1836

1937
Styler

pandas/api/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" public toolkit API """
22
from pandas.api import ( # noqa:F401
3+
exchange,
34
extensions,
45
indexers,
56
types,

pandas/api/exchange/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
Public API for DataFrame exchange protocol.
3+
"""
4+
5+
from pandas.core.exchange.dataframe_protocol import DataFrame
6+
from pandas.core.exchange.from_dataframe import from_dataframe
7+
8+
__all__ = ["from_dataframe", "DataFrame"]

pandas/core/exchange/__init__.py

Whitespace-only changes.

pandas/core/exchange/buffer.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from typing import (
2+
Optional,
3+
Tuple,
4+
)
5+
6+
import numpy as np
7+
from packaging import version
8+
9+
from pandas.core.exchange.dataframe_protocol import (
10+
Buffer,
11+
DlpackDeviceType,
12+
)
13+
14+
_NUMPY_HAS_DLPACK = version.parse(np.__version__) >= version.parse("1.22.0")
15+
16+
17+
class PandasBuffer(Buffer):
18+
"""
19+
Data in the buffer is guaranteed to be contiguous in memory.
20+
"""
21+
22+
def __init__(self, x: np.ndarray, allow_copy: bool = True) -> None:
23+
"""
24+
Handle only regular columns (= numpy arrays) for now.
25+
"""
26+
if not x.strides == (x.dtype.itemsize,):
27+
# The protocol does not support strided buffers, so a copy is
28+
# necessary. If that's not allowed, we need to raise an exception.
29+
if allow_copy:
30+
x = x.copy()
31+
else:
32+
raise RuntimeError(
33+
"Exports cannot be zero-copy in the case "
34+
"of a non-contiguous buffer"
35+
)
36+
37+
# Store the numpy array in which the data resides as a private
38+
# attribute, so we can use it to retrieve the public attributes
39+
self._x = x
40+
41+
@property
42+
def bufsize(self) -> int:
43+
"""
44+
Buffer size in bytes.
45+
"""
46+
return self._x.size * self._x.dtype.itemsize
47+
48+
@property
49+
def ptr(self) -> int:
50+
"""
51+
Pointer to start of the buffer as an integer.
52+
"""
53+
return self._x.__array_interface__["data"][0]
54+
55+
def __dlpack__(self):
56+
"""
57+
Represent this structure as DLPack interface.
58+
"""
59+
if _NUMPY_HAS_DLPACK:
60+
return self._x.__dlpack__()
61+
raise NotImplementedError("__dlpack__")
62+
63+
def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]:
64+
"""
65+
Device type and device ID for where the data in the buffer resides.
66+
"""
67+
return (DlpackDeviceType.CPU, None)
68+
69+
def __repr__(self) -> str:
70+
return (
71+
"PandasBuffer("
72+
+ str(
73+
{
74+
"bufsize": self.bufsize,
75+
"ptr": self.ptr,
76+
"device": self.__dlpack_device__()[0].name,
77+
}
78+
)
79+
+ ")"
80+
)

0 commit comments

Comments
 (0)