-
-
Notifications
You must be signed in to change notification settings - Fork 146
ENH: Synchronize pickle with upstream #206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
2c38ee9
55e9136
ad181d8
c94ce6c
d5d59c7
3b9073b
a0c42c9
09edcbc
8c75f7a
3166a55
7a1b4ee
bb42347
99e4203
eda7945
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,22 @@ | ||
from typing import Literal | ||
from typing import Any | ||
|
||
from pandas._typing import FilePathOrBuffer | ||
from pandas._typing import ( | ||
CompressionOptions, | ||
FilePath, | ||
ReadPickleBuffer, | ||
StorageOptions, | ||
WriteBuffer, | ||
) | ||
|
||
def to_pickle( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. still need to handle removal of |
||
obj, | ||
filepath_or_buffer: FilePathOrBuffer, | ||
compression: str | None = ..., | ||
obj: object, | ||
filepath_or_buffer: FilePath | WriteBuffer[bytes], | ||
compression: CompressionOptions = ..., | ||
protocol: int = ..., | ||
): ... | ||
storage_options: StorageOptions = ..., | ||
) -> None: ... | ||
def read_pickle( | ||
filepath_or_buffer_or_reader: FilePathOrBuffer, | ||
compression: str | Literal["infer", "gzip", "bz2", "zip", "xz"] | None = ..., | ||
): ... | ||
filepath_or_buffer: FilePath | ReadPickleBuffer, | ||
compression: CompressionOptions = ..., | ||
storage_options: StorageOptions = ..., | ||
) -> Any: ... |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import os | ||
import tempfile | ||
from typing import Any | ||
|
||
from pandas import DataFrame | ||
from typing_extensions import assert_type | ||
|
||
from tests import check | ||
|
||
from pandas.io.pickle import ( | ||
read_pickle, | ||
to_pickle, | ||
) | ||
|
||
DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]}) | ||
|
||
|
||
def test_pickle(): | ||
with tempfile.NamedTemporaryFile(delete=False) as file: | ||
check(assert_type(DF.to_pickle(file), None), type(None)) | ||
file.seek(0) | ||
check(assert_type(read_pickle(file.name), Any), DataFrame) | ||
file.close() | ||
check(assert_type(read_pickle(file.name), Any), DataFrame) | ||
os.unlink(file.name) | ||
|
||
with tempfile.NamedTemporaryFile(delete=False) as file: | ||
check(assert_type(to_pickle(DF, file), None), type(None)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is listed in pandas/io/api.py so I assume this makes it public even it not on the docs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There's a few schools of thought here:
So far, @twoertwein and I have been leaning towards (1). With @twoertwein What are your thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is more of what is the "API". To me it is something like
I think 2 is too broad because there hasn't been enough effort in pandas to In short, if it seems to be part of an API, then it is reasonable to include it. A related point is documenting public methods of classes that appear parrtially in the docs, something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My goal is that everything that is meant to be public (which is often unclear) is documented and in pandas-stubs. Personally, I think the best way is to remove any symbol from the stubs that is not meant to be public.
I believe typeshed uses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are some grey zones: a private super class but the inherited methods are public in a public child class: I would keep the parent class (in the long-term, I would like if pandas-stubs aligns with pandas), define There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think that's a fair definition. So using that, then Looking at the source |
||
file.seek(0) | ||
check(assert_type(read_pickle(file.name), Any), DataFrame) | ||
file.close() | ||
check(assert_type(read_pickle(file.name), Any), DataFrame) | ||
os.unlink(file.name) | ||
|
||
|
||
def test_pickle_protocol(): | ||
with tempfile.NamedTemporaryFile(delete=False) as file: | ||
DF.to_pickle(file, protocol=3) | ||
file.seek(0) | ||
check(assert_type(read_pickle(file.name), Any), DataFrame) | ||
file.close() | ||
check(assert_type(read_pickle(file.name), Any), DataFrame) | ||
os.unlink(file.name) | ||
|
||
|
||
def test_pickle_compression(): | ||
with tempfile.NamedTemporaryFile(delete=False) as file: | ||
DF.to_pickle(file, compression="gzip") | ||
file.seek(0) | ||
check( | ||
assert_type(read_pickle(file.name, compression="gzip"), Any), | ||
DataFrame, | ||
) | ||
file.close() | ||
check( | ||
assert_type(read_pickle(file.name, compression="gzip"), Any), | ||
DataFrame, | ||
) | ||
os.unlink(file.name) | ||
|
||
|
||
def test_pickle_storage_options(): | ||
with tempfile.NamedTemporaryFile(delete=False) as file: | ||
DF.to_pickle(file, storage_options={}) | ||
file.seek(0) | ||
check(assert_type(read_pickle(file, storage_options={}), Any), DataFrame) | ||
file.close() | ||
check( | ||
assert_type(read_pickle(file.name, storage_options={}), Any), | ||
DataFrame, | ||
) | ||
os.unlink(file.name) |
Uh oh!
There was an error while loading. Please reload this page.