Skip to content

Commit 00ae98d

Browse files
PERF: faster placement creating extension blocks from arrays (#32856)
1 parent 87cf295 commit 00ae98d

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

asv_bench/benchmarks/frame_ctor.py

+45
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22

3+
import pandas as pd
34
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
45

56
from .pandas_vb_common import tm
@@ -118,4 +119,48 @@ def time_frame_from_range(self):
118119
self.df = DataFrame(self.data)
119120

120121

122+
class FromArrays:
123+
124+
goal_time = 0.2
125+
126+
def setup(self):
127+
N_rows = 1000
128+
N_cols = 1000
129+
self.float_arrays = [np.random.randn(N_rows) for _ in range(N_cols)]
130+
self.sparse_arrays = [
131+
pd.arrays.SparseArray(np.random.randint(0, 2, N_rows), dtype="float64")
132+
for _ in range(N_cols)
133+
]
134+
self.int_arrays = [
135+
pd.array(np.random.randint(1000, size=N_rows), dtype="Int64")
136+
for _ in range(N_cols)
137+
]
138+
self.index = pd.Index(range(N_rows))
139+
self.columns = pd.Index(range(N_cols))
140+
141+
def time_frame_from_arrays_float(self):
142+
self.df = DataFrame._from_arrays(
143+
self.float_arrays,
144+
index=self.index,
145+
columns=self.columns,
146+
verify_integrity=False,
147+
)
148+
149+
def time_frame_from_arrays_int(self):
150+
self.df = DataFrame._from_arrays(
151+
self.int_arrays,
152+
index=self.index,
153+
columns=self.columns,
154+
verify_integrity=False,
155+
)
156+
157+
def time_frame_from_arrays_sparse(self):
158+
self.df = DataFrame._from_arrays(
159+
self.sparse_arrays,
160+
index=self.index,
161+
columns=self.columns,
162+
verify_integrity=False,
163+
)
164+
165+
121166
from .pandas_vb_common import setup # noqa: F401 isort:skip

pandas/_libs/internals.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ cdef class BlockPlacement:
3232
self._has_slice = False
3333
self._has_array = False
3434

35-
if isinstance(val, slice):
35+
if isinstance(val, int):
36+
slc = slice(val, val + 1, 1)
37+
self._as_slice = slc
38+
self._has_slice = True
39+
elif isinstance(val, slice):
3640
slc = slice_canonize(val)
3741

3842
if slc.start != slc.stop:

pandas/core/internals/managers.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1765,7 +1765,7 @@ def form_blocks(arrays, names, axes):
17651765

17661766
if len(items_dict["DatetimeTZBlock"]):
17671767
dttz_blocks = [
1768-
make_block(array, klass=DatetimeTZBlock, placement=[i])
1768+
make_block(array, klass=DatetimeTZBlock, placement=i)
17691769
for i, _, array in items_dict["DatetimeTZBlock"]
17701770
]
17711771
blocks.extend(dttz_blocks)
@@ -1780,23 +1780,23 @@ def form_blocks(arrays, names, axes):
17801780

17811781
if len(items_dict["CategoricalBlock"]) > 0:
17821782
cat_blocks = [
1783-
make_block(array, klass=CategoricalBlock, placement=[i])
1783+
make_block(array, klass=CategoricalBlock, placement=i)
17841784
for i, _, array in items_dict["CategoricalBlock"]
17851785
]
17861786
blocks.extend(cat_blocks)
17871787

17881788
if len(items_dict["ExtensionBlock"]):
17891789

17901790
external_blocks = [
1791-
make_block(array, klass=ExtensionBlock, placement=[i])
1791+
make_block(array, klass=ExtensionBlock, placement=i)
17921792
for i, _, array in items_dict["ExtensionBlock"]
17931793
]
17941794

17951795
blocks.extend(external_blocks)
17961796

17971797
if len(items_dict["ObjectValuesExtensionBlock"]):
17981798
external_blocks = [
1799-
make_block(array, klass=ObjectValuesExtensionBlock, placement=[i])
1799+
make_block(array, klass=ObjectValuesExtensionBlock, placement=i)
18001800
for i, _, array in items_dict["ObjectValuesExtensionBlock"]
18011801
]
18021802

0 commit comments

Comments
 (0)