diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py
index f639e9b18596c..d63d0fb07b404 100644
--- a/pandas/tests/arrays/sparse/test_indexing.py
+++ b/pandas/tests/arrays/sparse/test_indexing.py
@@ -6,18 +6,25 @@
import pandas._testing as tm
from pandas.core.arrays.sparse import SparseArray
-arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
-arr = SparseArray(arr_data)
+
+@pytest.fixture
+def arr_data():
+ return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
+
+
+@pytest.fixture
+def arr(arr_data):
+ return SparseArray(arr_data)
class TestGetitem:
- def test_getitem(self):
+ def test_getitem(self, arr):
dense = arr.to_dense()
for i, value in enumerate(arr):
tm.assert_almost_equal(value, dense[i])
tm.assert_almost_equal(arr[-i], dense[-i])
- def test_getitem_arraylike_mask(self):
+ def test_getitem_arraylike_mask(self, arr):
arr = SparseArray([0, 1, 2])
result = arr[[True, False, True]]
expected = SparseArray([0, 2])
@@ -81,7 +88,7 @@ def test_boolean_slice_empty(self):
res = arr[[False, False, False]]
assert res.dtype == arr.dtype
- def test_getitem_bool_sparse_array(self):
+ def test_getitem_bool_sparse_array(self, arr):
# GH 23122
spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
exp = SparseArray([np.nan, 2, np.nan, 5, 6])
@@ -106,7 +113,7 @@ def test_getitem_bool_sparse_array_as_comparison(self):
exp = SparseArray([3.0, 4.0], fill_value=np.nan)
tm.assert_sp_array_equal(res, exp)
- def test_get_item(self):
+ def test_get_item(self, arr):
zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
assert np.isnan(arr[1])
@@ -129,7 +136,7 @@ def test_get_item(self):
class TestSetitem:
- def test_set_item(self):
+ def test_set_item(self, arr_data):
arr = SparseArray(arr_data).copy()
def setitem():
@@ -146,12 +153,12 @@ def setslice():
class TestTake:
- def test_take_scalar_raises(self):
+ def test_take_scalar_raises(self, arr):
msg = "'indices' must be an array, not a scalar '2'."
with pytest.raises(ValueError, match=msg):
arr.take(2)
- def test_take(self):
+ def test_take(self, arr_data, arr):
exp = SparseArray(np.take(arr_data, [2, 3]))
tm.assert_sp_array_equal(arr.take([2, 3]), exp)
@@ -173,14 +180,14 @@ def test_take_fill_value(self):
exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
- def test_take_negative(self):
+ def test_take_negative(self, arr_data, arr):
exp = SparseArray(np.take(arr_data, [-1]))
tm.assert_sp_array_equal(arr.take([-1]), exp)
exp = SparseArray(np.take(arr_data, [-4, -3, -2]))
tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp)
- def test_bad_take(self):
+ def test_bad_take(self, arr):
with pytest.raises(IndexError, match="bounds"):
arr.take([11])
diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py
index b7517b1b16445..7a77a2064e7e0 100644
--- a/pandas/tests/arrays/sparse/test_libsparse.py
+++ b/pandas/tests/arrays/sparse/test_libsparse.py
@@ -14,77 +14,74 @@
make_sparse_index,
)
-TEST_LENGTH = 20
-
-plain_case = [
- [0, 7, 15],
- [3, 5, 5],
- [2, 9, 14],
- [2, 3, 5],
- [2, 9, 15],
- [1, 3, 4],
-]
-delete_blocks = [
- [0, 5],
- [4, 4],
- [1],
- [4],
- [1],
- [3],
-]
-split_blocks = [
- [0],
- [10],
- [0, 5],
- [3, 7],
- [0, 5],
- [3, 5],
-]
-skip_block = [
- [10],
- [5],
- [0, 12],
- [5, 3],
- [12],
- [3],
-]
-
-no_intersect = [
- [0, 10],
- [4, 6],
- [5, 17],
- [4, 2],
- [],
- [],
-]
-
-one_empty = [
- [0],
- [5],
- [],
- [],
- [],
- [],
-]
-
-both_empty = [ # type: ignore[var-annotated]
- [],
- [],
- [],
- [],
- [],
- [],
-]
-
-CASES = [plain_case, delete_blocks, split_blocks, skip_block, no_intersect, one_empty]
-IDS = [
- "plain_case",
- "delete_blocks",
- "split_blocks",
- "skip_block",
- "no_intersect",
- "one_empty",
-]
+
+@pytest.fixture
+def test_length():
+ return 20
+
+
+@pytest.fixture(
+ params=[
+ [
+ [0, 7, 15],
+ [3, 5, 5],
+ [2, 9, 14],
+ [2, 3, 5],
+ [2, 9, 15],
+ [1, 3, 4],
+ ],
+ [
+ [0, 5],
+ [4, 4],
+ [1],
+ [4],
+ [1],
+ [3],
+ ],
+ [
+ [0],
+ [10],
+ [0, 5],
+ [3, 7],
+ [0, 5],
+ [3, 5],
+ ],
+ [
+ [10],
+ [5],
+ [0, 12],
+ [5, 3],
+ [12],
+ [3],
+ ],
+ [
+ [0, 10],
+ [4, 6],
+ [5, 17],
+ [4, 2],
+ [],
+ [],
+ ],
+ [
+ [0],
+ [5],
+ [],
+ [],
+ [],
+ [],
+ ],
+ ],
+ ids=[
+ "plain_case",
+ "delete_blocks",
+ "split_blocks",
+ "skip_block",
+ "no_intersect",
+ "one_empty",
+ ],
+)
+def cases(request):
+ return request.param
class TestSparseIndexUnion:
@@ -101,7 +98,7 @@ class TestSparseIndexUnion:
[[0, 10], [3, 3], [5, 15], [2, 2], [0, 5, 10, 15], [3, 2, 3, 2]],
],
)
- def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen):
+ def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen, test_length):
# Case 1
# x: ----
# y: ----
@@ -132,8 +129,8 @@ def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen):
# Case 8
# x: ---- ---
# y: --- ---
- xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
- yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
+ xindex = BlockIndex(test_length, xloc, xlen)
+ yindex = BlockIndex(test_length, yloc, ylen)
bresult = xindex.make_union(yindex)
assert isinstance(bresult, BlockIndex)
tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32))
@@ -180,12 +177,12 @@ def test_int_index_make_union(self):
class TestSparseIndexIntersect:
@td.skip_if_windows
- @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS)
- def test_intersect(self, xloc, xlen, yloc, ylen, eloc, elen):
- xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
- yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
- expected = BlockIndex(TEST_LENGTH, eloc, elen)
- longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
+ def test_intersect(self, cases, test_length):
+ xloc, xlen, yloc, ylen, eloc, elen = cases
+ xindex = BlockIndex(test_length, xloc, xlen)
+ yindex = BlockIndex(test_length, yloc, ylen)
+ expected = BlockIndex(test_length, eloc, elen)
+ longer_index = BlockIndex(test_length + 1, yloc, ylen)
result = xindex.intersect(yindex)
assert result.equals(expected)
@@ -493,10 +490,10 @@ def test_equals(self):
assert index.equals(index)
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
- @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS)
- def test_to_block_index(self, xloc, xlen, yloc, ylen, eloc, elen):
- xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
- yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
+ def test_to_block_index(self, cases, test_length):
+ xloc, xlen, yloc, ylen, _, _ = cases
+ xindex = BlockIndex(test_length, xloc, xlen)
+ yindex = BlockIndex(test_length, yloc, ylen)
# see if survive the round trip
xbindex = xindex.to_int_index().to_block_index()
@@ -512,13 +509,13 @@ def test_to_int_index(self):
class TestSparseOperators:
@pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"])
- @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS)
- def test_op(self, opname, xloc, xlen, yloc, ylen, eloc, elen):
+ def test_op(self, opname, cases, test_length):
+ xloc, xlen, yloc, ylen, _, _ = cases
sparse_op = getattr(splib, f"sparse_{opname}_float64")
python_op = getattr(operator, opname)
- xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
- yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
+ xindex = BlockIndex(test_length, xloc, xlen)
+ yindex = BlockIndex(test_length, yloc, ylen)
xdindex = xindex.to_int_index()
ydindex = yindex.to_int_index()
@@ -542,10 +539,10 @@ def test_op(self, opname, xloc, xlen, yloc, ylen, eloc, elen):
# check versus Series...
xseries = Series(x, xdindex.indices)
- xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)
+ xseries = xseries.reindex(np.arange(test_length)).fillna(xfill)
yseries = Series(y, ydindex.indices)
- yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)
+ yseries = yseries.reindex(np.arange(test_length)).fillna(yfill)
series_result = python_op(xseries, yseries)
series_result = series_result.reindex(ri_index.indices)
diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py
index 36b7dcfe4db12..de36d52921622 100644
--- a/pandas/tests/indexing/multiindex/test_indexing_slow.py
+++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py
@@ -1,8 +1,3 @@
-from typing import (
- Any,
- List,
-)
-
import numpy as np
import pytest
@@ -13,78 +8,72 @@
)
import pandas._testing as tm
-m = 50
-n = 1000
-cols = ["jim", "joe", "jolie", "joline", "jolia"]
-
-vals: List[Any] = [
- np.random.randint(0, 10, n),
- np.random.choice(list("abcdefghij"), n),
- np.random.choice(pd.date_range("20141009", periods=10).tolist(), n),
- np.random.choice(list("ZYXWVUTSRQ"), n),
- np.random.randn(n),
-]
-vals = list(map(tuple, zip(*vals)))
-
-# bunch of keys for testing
-keys: List[Any] = [
- np.random.randint(0, 11, m),
- np.random.choice(list("abcdefghijk"), m),
- np.random.choice(pd.date_range("20141009", periods=11).tolist(), m),
- np.random.choice(list("ZYXWVUTSRQP"), m),
-]
-keys = list(map(tuple, zip(*keys)))
-keys += [t[:-1] for t in vals[:: n // m]]
+
+@pytest.fixture
+def m():
+ return 50
+
+
+@pytest.fixture
+def n():
+ return 1000
+
+
+@pytest.fixture
+def cols():
+ return ["jim", "joe", "jolie", "joline", "jolia"]
+
+
+@pytest.fixture
+def vals(n):
+ vals = [
+ np.random.randint(0, 10, n),
+ np.random.choice(list("abcdefghij"), n),
+ np.random.choice(pd.date_range("20141009", periods=10).tolist(), n),
+ np.random.choice(list("ZYXWVUTSRQ"), n),
+ np.random.randn(n),
+ ]
+ vals = list(map(tuple, zip(*vals)))
+ return vals
+
+
+@pytest.fixture
+def keys(n, m, vals):
+ # bunch of keys for testing
+ keys = [
+ np.random.randint(0, 11, m),
+ np.random.choice(list("abcdefghijk"), m),
+ np.random.choice(pd.date_range("20141009", periods=11).tolist(), m),
+ np.random.choice(list("ZYXWVUTSRQP"), m),
+ ]
+ keys = list(map(tuple, zip(*keys)))
+ keys += [t[:-1] for t in vals[:: n // m]]
+ return keys
# covers both unique index and non-unique index
-df = DataFrame(vals, columns=cols)
-a = pd.concat([df, df])
-b = df.drop_duplicates(subset=cols[:-1])
-
-
-def validate(mi, df, key):
- # check indexing into a multi-index before & past the lexsort depth
-
- mask = np.ones(len(df), dtype=bool)
-
- # test for all partials of this key
- for i, k in enumerate(key):
- mask &= df.iloc[:, i] == k
-
- if not mask.any():
- assert key[: i + 1] not in mi.index
- continue
-
- assert key[: i + 1] in mi.index
- right = df[mask].copy(deep=False)
-
- if i + 1 != len(key): # partial key
- return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
- assert return_value is None
- return_value = right.set_index(cols[i + 1 : -1], inplace=True)
- assert return_value is None
- tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
-
- else: # full key
- return_value = right.set_index(cols[:-1], inplace=True)
- assert return_value is None
- if len(right) == 1: # single hit
- right = Series(
- right["jolia"].values, name=right.index[0], index=["jolia"]
- )
- tm.assert_series_equal(mi.loc[key[: i + 1]], right)
- else: # multi hit
- tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
+@pytest.fixture
+def df(vals, cols):
+ return DataFrame(vals, columns=cols)
+
+
+@pytest.fixture
+def a(df):
+ return pd.concat([df, df])
+
+
+@pytest.fixture
+def b(df, cols):
+ return df.drop_duplicates(subset=cols[:-1])
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
-@pytest.mark.parametrize("key", keys)
-@pytest.mark.parametrize("frame", [a, b])
-def test_multiindex_get_loc(lexsort_depth, key, frame):
+@pytest.mark.parametrize("frame_fixture", ["a", "b"])
+def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols):
# GH7724, GH2646
+ frame = request.getfixturevalue(frame_fixture)
if lexsort_depth == 0:
df = frame.copy(deep=False)
else:
@@ -92,4 +81,34 @@ def test_multiindex_get_loc(lexsort_depth, key, frame):
mi = df.set_index(cols[:-1])
assert not mi.index._lexsort_depth < lexsort_depth
- validate(mi, df, key)
+ for key in keys:
+ mask = np.ones(len(df), dtype=bool)
+
+ # test for all partials of this key
+ for i, k in enumerate(key):
+ mask &= df.iloc[:, i] == k
+
+ if not mask.any():
+ assert key[: i + 1] not in mi.index
+ continue
+
+ assert key[: i + 1] in mi.index
+ right = df[mask].copy(deep=False)
+
+ if i + 1 != len(key): # partial key
+ return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
+ assert return_value is None
+ return_value = right.set_index(cols[i + 1 : -1], inplace=True)
+ assert return_value is None
+ tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
+
+ else: # full key
+ return_value = right.set_index(cols[:-1], inplace=True)
+ assert return_value is None
+ if len(right) == 1: # single hit
+ right = Series(
+ right["jolia"].values, name=right.index[0], index=["jolia"]
+ )
+ tm.assert_series_equal(mi.loc[key[: i + 1]], right)
+ else: # multi hit
+ tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index b863e85cae457..68365c125a951 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -15,9 +15,15 @@
import pandas._testing as tm
+import pandas.io.common as icom
from pandas.io.parsers import read_csv
+@pytest.fixture
+def compression_to_extension():
+ return {value: key for key, value in icom.extension_to_compression.items()}
+
+
@pytest.fixture
def tips_file(datapath):
"""Path to the tips dataset"""
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index a208daaf9f77b..32509a799fa69 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -13,7 +13,6 @@
compat,
)
import pandas._testing as tm
-from pandas.tests.io.test_compression import _compression_to_extension
class TestToCSV:
@@ -543,13 +542,15 @@ def test_to_csv_write_to_open_file_with_newline_py3(self):
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
- def test_to_csv_compression(self, compression_only, read_infer, to_infer):
+ def test_to_csv_compression(
+ self, compression_only, read_infer, to_infer, compression_to_extension
+ ):
# see gh-15008
compression = compression_only
# We'll complete file extension subsequently.
filename = "test."
- filename += _compression_to_extension[compression]
+ filename += compression_to_extension[compression]
df = DataFrame({"A": [1]})
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index 143d2431d4147..4a7606eaf05d7 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -6,7 +6,6 @@
import pandas as pd
import pandas._testing as tm
-from pandas.tests.io.test_compression import _compression_to_extension
def test_compression_roundtrip(compression):
@@ -91,13 +90,15 @@ def test_read_unsupported_compression_type():
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
-def test_to_json_compression(compression_only, read_infer, to_infer):
+def test_to_json_compression(
+ compression_only, read_infer, to_infer, compression_to_extension
+):
# see gh-15008
compression = compression_only
# We'll complete file extension subsequently.
filename = "test."
- filename += _compression_to_extension[compression]
+ filename += compression_to_extension[compression]
df = pd.DataFrame({"A": [1]})
diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py
index bcba9c4a1823d..d150b52258d47 100644
--- a/pandas/tests/io/parser/test_compression.py
+++ b/pandas/tests/io/parser/test_compression.py
@@ -12,7 +12,6 @@
from pandas import DataFrame
import pandas._testing as tm
-from pandas.tests.io.test_compression import _compression_to_extension
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
@@ -91,11 +90,18 @@ def test_zip_error_invalid_zip(parser_and_data):
@skip_pyarrow
@pytest.mark.parametrize("filename", [None, "test.{ext}"])
-def test_compression(request, parser_and_data, compression_only, buffer, filename):
+def test_compression(
+ request,
+ parser_and_data,
+ compression_only,
+ buffer,
+ filename,
+ compression_to_extension,
+):
parser, data, expected = parser_and_data
compress_type = compression_only
- ext = _compression_to_extension[compress_type]
+ ext = compression_to_extension[compress_type]
filename = filename if filename is None else filename.format(ext=ext)
if filename and buffer:
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index a0d9c6ae99dcf..f3ae5b54d09ce 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -16,7 +16,6 @@
from pandas import DataFrame
import pandas._testing as tm
-from pandas.tests.io.test_compression import _compression_to_extension
from pandas.io.feather_format import read_feather
from pandas.io.parsers import read_csv
@@ -32,10 +31,12 @@
)
@pytest.mark.parametrize("mode", ["explicit", "infer"])
@pytest.mark.parametrize("engine", ["python", "c"])
-def test_compressed_urls(salaries_table, mode, engine, compression_only):
+def test_compressed_urls(
+ salaries_table, mode, engine, compression_only, compression_to_extension
+):
# test reading compressed urls with various engines and
# extension inference
- extension = _compression_to_extension[compression_only]
+ extension = compression_to_extension[compression_only]
base_url = (
"https://github.com/pandas-dev/pandas/raw/main/"
"pandas/tests/io/parser/data/salaries.csv"
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 030650ad0031d..c682963c462cc 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -26,7 +26,6 @@
ArrowStringArray,
StringArray,
)
-from pandas.tests.io.test_compression import _compression_to_extension
from pandas.io.common import urlopen
from pandas.io.parsers import (
@@ -667,13 +666,13 @@ def test_default_delimiter():
@pytest.mark.parametrize("infer", [True, False])
-def test_fwf_compression(compression_only, infer):
+def test_fwf_compression(compression_only, infer, compression_to_extension):
data = """1111111111
2222222222
3333333333""".strip()
compression = compression_only
- extension = _compression_to_extension[compression]
+ extension = compression_to_extension[compression]
kwargs = {"widths": [5, 5], "names": ["one", "two"]}
expected = read_fwf(StringIO(data), **kwargs)
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index ac11e2165eb6f..c84670f0eb69c 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -18,10 +18,6 @@
import pandas.io.common as icom
-_compression_to_extension = {
- value: key for key, value in icom.extension_to_compression.items()
-}
-
@pytest.mark.parametrize(
"obj",
@@ -84,11 +80,11 @@ def test_compression_size_fh(obj, method, compression_only):
],
)
def test_dataframe_compression_defaults_to_infer(
- write_method, write_kwargs, read_method, compression_only
+ write_method, write_kwargs, read_method, compression_only, compression_to_extension
):
# GH22004
input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=["X", "Y", "Z"])
- extension = _compression_to_extension[compression_only]
+ extension = compression_to_extension[compression_only]
with tm.ensure_clean("compressed" + extension) as path:
getattr(input, write_method)(path, **write_kwargs)
output = read_method(path, compression=compression_only)
@@ -104,11 +100,16 @@ def test_dataframe_compression_defaults_to_infer(
],
)
def test_series_compression_defaults_to_infer(
- write_method, write_kwargs, read_method, read_kwargs, compression_only
+ write_method,
+ write_kwargs,
+ read_method,
+ read_kwargs,
+ compression_only,
+ compression_to_extension,
):
# GH22004
input = pd.Series([0, 5, -2, 10], name="X")
- extension = _compression_to_extension[compression_only]
+ extension = compression_to_extension[compression_only]
with tm.ensure_clean("compressed" + extension) as path:
getattr(input, write_method)(path, **write_kwargs)
if "squeeze" in read_kwargs:
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index 01e1be5529bad..7b139dc45624e 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -11,7 +11,7 @@
from pandas.io.feather_format import read_feather, to_feather # isort:skip
-pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1")
+pyarrow = pytest.importorskip("pyarrow")
@pytest.mark.single_cpu
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index 18cc0f0b11dc9..d82cfd5bd169d 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -16,7 +16,6 @@
read_parquet,
)
import pandas._testing as tm
-from pandas.tests.io.test_compression import _compression_to_extension
from pandas.util import _test_decorators as td
@@ -132,7 +131,9 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str):
@td.skip_if_no("gcsfs")
@pytest.mark.parametrize("encoding", ["utf-8", "cp1251"])
-def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding):
+def test_to_csv_compression_encoding_gcs(
+ gcs_buffer, compression_only, encoding, compression_to_extension
+):
"""
Compression and encoding should with GCS.
@@ -161,7 +162,7 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
tm.assert_frame_equal(df, read_df)
# write compressed file with implicit compression
- file_ext = _compression_to_extension[compression_only]
+ file_ext = compression_to_extension[compression_only]
compression["method"] = "infer"
path_gcs += f".{file_ext}"
df.to_csv(path_gcs, compression=compression, encoding=encoding)
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 36cfe5576adf9..571d9d5536e20 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -25,25 +25,19 @@ def dirpath(datapath):
return datapath("io", "data", "orc")
-# Examples of dataframes with dtypes for which conversion to ORC
-# hasn't been implemented yet, that is, Category, unsigned integers,
-# interval, period and sparse.
-orc_writer_dtypes_not_supported = [
- pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")}),
- pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")}),
- pd.DataFrame(
- {"unimpl": [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)]}
- ),
- pd.DataFrame(
- {
- "unimpl": [
- pd.Period("2022-01-03", freq="D"),
- pd.Period("2022-01-04", freq="D"),
- ]
- }
- ),
- pd.DataFrame({"unimpl": [np.nan] * 50}).astype(pd.SparseDtype("float", np.nan)),
-]
+@pytest.fixture(
+ params=[
+ np.array([1, 20], dtype="uint64"),
+ pd.Series(["a", "b", "a"], dtype="category"),
+ [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)],
+ [pd.Period("2022-01-03", freq="D"), pd.Period("2022-01-04", freq="D")],
+ ]
+)
+def orc_writer_dtypes_not_supported(request):
+ # Examples of dataframes with dtypes for which conversion to ORC
+ # hasn't been implemented yet, that is, Category, unsigned integers,
+ # interval, period and sparse.
+ return pd.DataFrame({"unimpl": request.param})
def test_orc_reader_empty(dirpath):
@@ -297,13 +291,12 @@ def test_orc_roundtrip_bytesio():
@td.skip_if_no("pyarrow", min_version="7.0.0")
-@pytest.mark.parametrize("df_not_supported", orc_writer_dtypes_not_supported)
-def test_orc_writer_dtypes_not_supported(df_not_supported):
+def test_orc_writer_dtypes_not_supported(orc_writer_dtypes_not_supported):
# GH44554
# PyArrow gained ORC write support with the current argument order
msg = "The dtype of one or more columns is not supported yet."
with pytest.raises(NotImplementedError, match=msg):
- df_not_supported.to_orc()
+ orc_writer_dtypes_not_supported.to_orc()
@td.skip_if_no("pyarrow", min_version="7.0.0")
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 1b0a1d740677b..68f9b2b64b92a 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -19,7 +19,6 @@
DataFrame,
Series,
)
-from pandas.tests.io.test_compression import _compression_to_extension
from pandas.io.parsers import read_csv
from pandas.io.stata import (
@@ -1964,13 +1963,13 @@ def test_statareader_warns_when_used_without_context(datapath):
@pytest.mark.parametrize("version", [114, 117, 118, 119, None])
@pytest.mark.parametrize("use_dict", [True, False])
@pytest.mark.parametrize("infer", [True, False])
-def test_compression(compression, version, use_dict, infer):
+def test_compression(compression, version, use_dict, infer, compression_to_extension):
file_name = "dta_inferred_compression.dta"
if compression:
if use_dict:
file_ext = compression
else:
- file_ext = _compression_to_extension[compression]
+ file_ext = compression_to_extension[compression]
file_name += f".{file_ext}"
compression_arg = compression
if infer:
@@ -2134,10 +2133,12 @@ def test_compression_roundtrip(compression):
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
-def test_stata_compression(compression_only, read_infer, to_infer):
+def test_stata_compression(
+ compression_only, read_infer, to_infer, compression_to_extension
+):
compression = compression_only
- ext = _compression_to_extension[compression]
+ ext = compression_to_extension[compression]
filename = f"test.{ext}"
df = DataFrame(
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 1f1f44f408fc1..04194a68ed512 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -17,7 +17,6 @@
Index,
)
import pandas._testing as tm
-from pandas.tests.io.test_compression import _compression_to_extension
from pandas.io.common import get_handle
from pandas.io.xml import read_xml
@@ -56,60 +55,69 @@
# [X] - XSLTParseError: "failed to compile"
# [X] - PermissionError: "Forbidden"
-geom_df = DataFrame(
- {
- "shape": ["square", "circle", "triangle"],
- "degrees": [360, 360, 180],
- "sides": [4, np.nan, 3],
- }
-)
-planet_df = DataFrame(
- {
- "planet": [
- "Mercury",
- "Venus",
- "Earth",
- "Mars",
- "Jupiter",
- "Saturn",
- "Uranus",
- "Neptune",
- ],
- "type": [
- "terrestrial",
- "terrestrial",
- "terrestrial",
- "terrestrial",
- "gas giant",
- "gas giant",
- "ice giant",
- "ice giant",
- ],
- "location": [
- "inner",
- "inner",
- "inner",
- "inner",
- "outer",
- "outer",
- "outer",
- "outer",
- ],
- "mass": [
- 0.330114,
- 4.86747,
- 5.97237,
- 0.641712,
- 1898.187,
- 568.3174,
- 86.8127,
- 102.4126,
- ],
- }
-)
+@pytest.fixture
+def geom_df():
+ return DataFrame(
+ {
+ "shape": ["square", "circle", "triangle"],
+ "degrees": [360, 360, 180],
+ "sides": [4, np.nan, 3],
+ }
+ )
+
+
+@pytest.fixture
+def planet_df():
+ return DataFrame(
+ {
+ "planet": [
+ "Mercury",
+ "Venus",
+ "Earth",
+ "Mars",
+ "Jupiter",
+ "Saturn",
+ "Uranus",
+ "Neptune",
+ ],
+ "type": [
+ "terrestrial",
+ "terrestrial",
+ "terrestrial",
+ "terrestrial",
+ "gas giant",
+ "gas giant",
+ "ice giant",
+ "ice giant",
+ ],
+ "location": [
+ "inner",
+ "inner",
+ "inner",
+ "inner",
+ "outer",
+ "outer",
+ "outer",
+ "outer",
+ ],
+ "mass": [
+ 0.330114,
+ 4.86747,
+ 5.97237,
+ 0.641712,
+ 1898.187,
+ 568.3174,
+ 86.8127,
+ 102.4126,
+ ],
+ }
+ )
+
-from_file_expected = """\
+@pytest.fixture
+def from_file_expected():
+ return """\
@@ -163,7 +171,7 @@ def parser(request):
# FILE OUTPUT
-def test_file_output_str_read(datapath, parser):
+def test_file_output_str_read(datapath, parser, from_file_expected):
filename = datapath("io", "data", "xml", "books.xml")
df_file = read_xml(filename, parser=parser)
@@ -177,7 +185,7 @@ def test_file_output_str_read(datapath, parser):
assert output == from_file_expected
-def test_file_output_bytes_read(datapath, parser):
+def test_file_output_bytes_read(datapath, parser, from_file_expected):
filename = datapath("io", "data", "xml", "books.xml")
df_file = read_xml(filename, parser=parser)
@@ -191,7 +199,7 @@ def test_file_output_bytes_read(datapath, parser):
assert output == from_file_expected
-def test_str_output(datapath, parser):
+def test_str_output(datapath, parser, from_file_expected):
filename = datapath("io", "data", "xml", "books.xml")
df_file = read_xml(filename, parser=parser)
@@ -201,7 +209,7 @@ def test_str_output(datapath, parser):
assert output == from_file_expected
-def test_wrong_file_path(parser):
+def test_wrong_file_path(parser, geom_df):
path = "/my/fake/path/output.xml"
with pytest.raises(
@@ -299,7 +307,7 @@ def test_index_false_rename_row_root(datapath, parser):
@pytest.mark.parametrize(
"offset_index", [list(range(10, 13)), [str(i) for i in range(10, 13)]]
)
-def test_index_false_with_offset_input_index(parser, offset_index):
+def test_index_false_with_offset_input_index(parser, offset_index, geom_df):
"""
Tests that the output does not contain the `` field when the index of the
input Dataframe has an offset.
@@ -361,21 +369,21 @@ def test_index_false_with_offset_input_index(parser, offset_index):
"""
-def test_na_elem_output(parser):
+def test_na_elem_output(parser, geom_df):
output = geom_df.to_xml(parser=parser)
output = equalize_decl(output)
assert output == na_expected
-def test_na_empty_str_elem_option(parser):
+def test_na_empty_str_elem_option(parser, geom_df):
output = geom_df.to_xml(na_rep="", parser=parser)
output = equalize_decl(output)
assert output == na_expected
-def test_na_empty_elem_option(parser):
+def test_na_empty_elem_option(parser, geom_df):
expected = """\
@@ -408,7 +416,7 @@ def test_na_empty_elem_option(parser):
# ATTR_COLS
-def test_attrs_cols_nan_output(parser):
+def test_attrs_cols_nan_output(parser, geom_df):
expected = """\
@@ -423,7 +431,7 @@ def test_attrs_cols_nan_output(parser):
assert output == expected
-def test_attrs_cols_prefix(parser):
+def test_attrs_cols_prefix(parser, geom_df):
expected = """\
@@ -446,12 +454,12 @@ def test_attrs_cols_prefix(parser):
assert output == expected
-def test_attrs_unknown_column(parser):
+def test_attrs_unknown_column(parser, geom_df):
with pytest.raises(KeyError, match=("no valid column")):
geom_df.to_xml(attr_cols=["shape", "degree", "sides"], parser=parser)
-def test_attrs_wrong_type(parser):
+def test_attrs_wrong_type(parser, geom_df):
with pytest.raises(TypeError, match=("is not a valid type for attr_cols")):
geom_df.to_xml(attr_cols='"shape", "degree", "sides"', parser=parser)
@@ -459,7 +467,7 @@ def test_attrs_wrong_type(parser):
# ELEM_COLS
-def test_elems_cols_nan_output(parser):
+def test_elems_cols_nan_output(parser, geom_df):
elems_cols_expected = """\
@@ -488,17 +496,17 @@ def test_elems_cols_nan_output(parser):
assert output == elems_cols_expected
-def test_elems_unknown_column(parser):
+def test_elems_unknown_column(parser, geom_df):
with pytest.raises(KeyError, match=("no valid column")):
geom_df.to_xml(elem_cols=["shape", "degree", "sides"], parser=parser)
-def test_elems_wrong_type(parser):
+def test_elems_wrong_type(parser, geom_df):
with pytest.raises(TypeError, match=("is not a valid type for elem_cols")):
geom_df.to_xml(elem_cols='"shape", "degree", "sides"', parser=parser)
-def test_elems_and_attrs_cols(parser):
+def test_elems_and_attrs_cols(parser, geom_df):
elems_cols_expected = """\
@@ -530,7 +538,7 @@ def test_elems_and_attrs_cols(parser):
# HIERARCHICAL COLUMNS
-def test_hierarchical_columns(parser):
+def test_hierarchical_columns(parser, planet_df):
expected = """\
@@ -577,7 +585,7 @@ def test_hierarchical_columns(parser):
assert output == expected
-def test_hierarchical_attrs_columns(parser):
+def test_hierarchical_attrs_columns(parser, planet_df):
expected = """\
@@ -607,7 +615,7 @@ def test_hierarchical_attrs_columns(parser):
# MULTIINDEX
-def test_multi_index(parser):
+def test_multi_index(parser, planet_df):
expected = """\
@@ -646,7 +654,7 @@ def test_multi_index(parser):
assert output == expected
-def test_multi_index_attrs_cols(parser):
+def test_multi_index_attrs_cols(parser, planet_df):
expected = """\
@@ -672,7 +680,7 @@ def test_multi_index_attrs_cols(parser):
# NAMESPACE
-def test_default_namespace(parser):
+def test_default_namespace(parser, geom_df):
expected = """\
@@ -705,7 +713,7 @@ def test_default_namespace(parser):
# PREFIX
-def test_namespace_prefix(parser):
+def test_namespace_prefix(parser, geom_df):
expected = """\
@@ -737,14 +745,14 @@ def test_namespace_prefix(parser):
assert output == expected
-def test_missing_prefix_in_nmsp(parser):
+def test_missing_prefix_in_nmsp(parser, geom_df):
with pytest.raises(KeyError, match=("doc is not included in namespaces")):
geom_df.to_xml(
namespaces={"": "http://example.com"}, prefix="doc", parser=parser
)
-def test_namespace_prefix_and_default(parser):
+def test_namespace_prefix_and_default(parser, geom_df):
expected = """\
@@ -858,7 +866,7 @@ def test_wrong_encoding_option_lxml(datapath, parser, encoding):
df_file.to_xml(path, index=False, encoding=encoding, parser=parser)
-def test_misspelled_encoding(parser):
+def test_misspelled_encoding(parser, geom_df):
with pytest.raises(LookupError, match=("unknown encoding")):
geom_df.to_xml(encoding="uft-8", parser=parser)
@@ -867,7 +875,7 @@ def test_misspelled_encoding(parser):
@td.skip_if_no("lxml")
-def test_xml_declaration_pretty_print():
+def test_xml_declaration_pretty_print(geom_df):
expected = """\
@@ -895,7 +903,7 @@ def test_xml_declaration_pretty_print():
assert output == expected
-def test_no_pretty_print_with_decl(parser):
+def test_no_pretty_print_with_decl(parser, geom_df):
expected = (
"\n"
"0square"
@@ -916,7 +924,7 @@ def test_no_pretty_print_with_decl(parser):
assert output == expected
-def test_no_pretty_print_no_decl(parser):
+def test_no_pretty_print_no_decl(parser, geom_df):
expected = (
"0square"
"3604.0
"
@@ -939,14 +947,14 @@ def test_no_pretty_print_no_decl(parser):
@td.skip_if_installed("lxml")
-def test_default_parser_no_lxml():
+def test_default_parser_no_lxml(geom_df):
with pytest.raises(
ImportError, match=("lxml not found, please install or use the etree parser.")
):
geom_df.to_xml()
-def test_unknown_parser():
+def test_unknown_parser(geom_df):
with pytest.raises(
ValueError, match=("Values for parser can only be lxml or etree.")
):
@@ -980,7 +988,7 @@ def test_unknown_parser():
@td.skip_if_no("lxml")
-def test_stylesheet_file_like(datapath, mode):
+def test_stylesheet_file_like(datapath, mode, geom_df):
xsl = datapath("io", "data", "xml", "row_field_output.xsl")
with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
@@ -988,7 +996,7 @@ def test_stylesheet_file_like(datapath, mode):
@td.skip_if_no("lxml")
-def test_stylesheet_io(datapath, mode):
+def test_stylesheet_io(datapath, mode, geom_df):
xsl_path = datapath("io", "data", "xml", "row_field_output.xsl")
# note: By default the bodies of untyped functions are not checked,
@@ -1007,7 +1015,7 @@ def test_stylesheet_io(datapath, mode):
@td.skip_if_no("lxml")
-def test_stylesheet_buffered_reader(datapath, mode):
+def test_stylesheet_buffered_reader(datapath, mode, geom_df):
xsl = datapath("io", "data", "xml", "row_field_output.xsl")
with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
@@ -1019,7 +1027,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
@td.skip_if_no("lxml")
-def test_stylesheet_wrong_path():
+def test_stylesheet_wrong_path(geom_df):
from lxml.etree import XMLSyntaxError
xsl = os.path.join("data", "xml", "row_field_output.xslt")
@@ -1033,7 +1041,7 @@ def test_stylesheet_wrong_path():
@td.skip_if_no("lxml")
@pytest.mark.parametrize("val", ["", b""])
-def test_empty_string_stylesheet(val):
+def test_empty_string_stylesheet(val, geom_df):
from lxml.etree import XMLSyntaxError
msg = "|".join(
@@ -1050,7 +1058,7 @@ def test_empty_string_stylesheet(val):
@td.skip_if_no("lxml")
-def test_incorrect_xsl_syntax():
+def test_incorrect_xsl_syntax(geom_df):
from lxml.etree import XMLSyntaxError
xsl = """\
@@ -1079,7 +1087,7 @@ def test_incorrect_xsl_syntax():
@td.skip_if_no("lxml")
-def test_incorrect_xsl_eval():
+def test_incorrect_xsl_eval(geom_df):
from lxml.etree import XSLTParseError
xsl = """\
@@ -1108,7 +1116,7 @@ def test_incorrect_xsl_eval():
@td.skip_if_no("lxml")
-def test_incorrect_xsl_apply():
+def test_incorrect_xsl_apply(geom_df):
from lxml.etree import XSLTApplyError
xsl = """\
@@ -1128,7 +1136,7 @@ def test_incorrect_xsl_apply():
geom_df.to_xml(path, stylesheet=xsl)
-def test_stylesheet_with_etree():
+def test_stylesheet_with_etree(geom_df):
xsl = """\
@@ -1147,7 +1155,7 @@ def test_stylesheet_with_etree():
@td.skip_if_no("lxml")
-def test_style_to_csv():
+def test_style_to_csv(geom_df):
xsl = """\
@@ -1176,7 +1184,7 @@ def test_style_to_csv():
@td.skip_if_no("lxml")
-def test_style_to_string():
+def test_style_to_string(geom_df):
xsl = """\
@@ -1210,7 +1218,7 @@ def test_style_to_string():
@td.skip_if_no("lxml")
-def test_style_to_json():
+def test_style_to_json(geom_df):
xsl = """\
@@ -1281,7 +1289,7 @@ def test_style_to_json():
"""
-def test_compression_output(parser, compression_only):
+def test_compression_output(parser, compression_only, geom_df):
with tm.ensure_clean() as path:
geom_df.to_xml(path, parser=parser, compression=compression_only)
@@ -1297,8 +1305,10 @@ def test_compression_output(parser, compression_only):
assert geom_xml == output.strip()
-def test_filename_and_suffix_comp(parser, compression_only):
- compfile = "xml." + _compression_to_extension[compression_only]
+def test_filename_and_suffix_comp(
+ parser, compression_only, geom_df, compression_to_extension
+):
+ compfile = "xml." + compression_to_extension[compression_only]
with tm.ensure_clean(filename=compfile) as path:
geom_df.to_xml(path, parser=parser, compression=compression_only)
@@ -1328,7 +1338,7 @@ def test_ea_dtypes(any_numeric_ea_dtype, parser):
assert equalize_decl(result).strip() == expected
-def test_unsuported_compression(parser):
+def test_unsuported_compression(parser, geom_df):
with pytest.raises(ValueError, match="Unrecognized compression type"):
with tm.ensure_clean() as path:
geom_df.to_xml(path, parser=parser, compression="7z")
@@ -1340,7 +1350,7 @@ def test_unsuported_compression(parser):
@pytest.mark.single_cpu
@td.skip_if_no("s3fs")
@td.skip_if_no("lxml")
-def test_s3_permission_output(parser, s3_resource):
+def test_s3_permission_output(parser, s3_resource, geom_df):
# s3_resource hosts pandas-test
import s3fs