Skip to content

Commit baaae40

Browse files
committed
_bool: implement support for postgres BOOLEAN array
- update typesystems, destinations - update pandas_columns, transports, and postgres - implement Vec<bool>, Option<Vec<bool>> for PostgresCSVSourceParser and PostgresSimpleSourceParser (parsing for `t` and `f` input) - modify `impl_arrow_assoc_vec` to take three arguments - had to change the macro since booleans use `MutableBooleanArray` - update `test_postgres.py`, `test_polars.rs`
1 parent e735e73 commit baaae40

File tree

12 files changed

+257
-19
lines changed

12 files changed

+257
-19
lines changed

connectorx-python/connectorx/tests/test_postgres.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ def test_postgres_with_index_col(postgres_url: str) -> None:
450450

451451

452452
def test_postgres_types_binary(postgres_url: str) -> None:
453-
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum, test_f4array, test_f8array, test_narray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree, test_lquery, test_ltxtquery FROM test_types"
453+
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree, test_lquery, test_ltxtquery FROM test_types"
454454
df = read_sql(postgres_url, query)
455455
expected = pd.DataFrame(
456456
index=range(4),
@@ -538,6 +538,9 @@ def test_postgres_types_binary(postgres_url: str) -> None:
538538
"test_narray": pd.Series(
539539
[[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object"
540540
),
541+
"test_boolarray": pd.Series(
542+
[[True, False], [], [True], None], dtype="object"
543+
),
541544
"test_i2array": pd.Series(
542545
[[-1, 0, 1], [], [-32768, 32767], None], dtype="object"
543546
),
@@ -560,7 +563,7 @@ def test_postgres_types_binary(postgres_url: str) -> None:
560563

561564

562565
def test_postgres_types_csv(postgres_url: str) -> None:
563-
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum::text, test_f4array, test_f8array, test_narray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree FROM test_types"
566+
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum::text, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree FROM test_types"
564567
df = read_sql(postgres_url, query, protocol="csv")
565568
expected = pd.DataFrame(
566569
index=range(4),
@@ -648,6 +651,9 @@ def test_postgres_types_csv(postgres_url: str) -> None:
648651
"test_narray": pd.Series(
649652
[[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object"
650653
),
654+
"test_boolarray": pd.Series(
655+
[[True, False], [], [True], None], dtype="object"
656+
),
651657
"test_i2array": pd.Series(
652658
[[-1, 0, 1], [], [-32768, 32767], None], dtype="object"
653659
),
@@ -666,7 +672,7 @@ def test_postgres_types_csv(postgres_url: str) -> None:
666672

667673

668674
def test_postgres_types_cursor(postgres_url: str) -> None:
669-
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum::text, test_f4array, test_f8array, test_narray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree FROM test_types"
675+
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum::text, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree FROM test_types"
670676
df = read_sql(postgres_url, query, protocol="cursor")
671677
expected = pd.DataFrame(
672678
index=range(4),
@@ -754,6 +760,9 @@ def test_postgres_types_cursor(postgres_url: str) -> None:
754760
"test_narray": pd.Series(
755761
[[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object"
756762
),
763+
"test_boolarray": pd.Series(
764+
[[True, False], [], [True], None], dtype="object"
765+
),
757766
"test_i2array": pd.Series(
758767
[[-1, 0, 1], [], [-32768, 32767], None], dtype="object"
759768
),
@@ -772,7 +781,7 @@ def test_postgres_types_cursor(postgres_url: str) -> None:
772781

773782

774783
def test_postgres_types_simple(postgres_url: str) -> None:
775-
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_bytea, test_enum, test_f4array, test_f8array, test_narray, test_i2array, test_i4array, test_i8array FROM test_types"
784+
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_bytea, test_enum, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array FROM test_types"
776785
df = read_sql(postgres_url, query, protocol="simple")
777786
expected = pd.DataFrame(
778787
index=range(4),
@@ -842,6 +851,9 @@ def test_postgres_types_simple(postgres_url: str) -> None:
842851
"test_narray": pd.Series(
843852
[[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object"
844853
),
854+
"test_boolarray": pd.Series(
855+
[[True, False], [], [True], None], dtype="object"
856+
),
845857
"test_i2array": pd.Series(
846858
[[-1, 0, 1], [], [-32768, 32767], None], dtype="object"
847859
),

connectorx-python/src/pandas/destination.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ impl<'a> Destination for PandasDestination<'a> {
169169
PandasBlockType::Float64 => {
170170
self.allocate_array::<f64>(dt, placement)?;
171171
}
172+
PandasBlockType::BooleanArray => {
173+
self.allocate_array::<super::pandas_columns::PyList>(dt, placement)?;
174+
}
172175
PandasBlockType::Float64Array => {
173176
self.allocate_array::<super::pandas_columns::PyList>(dt, placement)?;
174177
}
@@ -219,6 +222,17 @@ impl<'a> Destination for PandasDestination<'a> {
219222
.collect()
220223
}
221224
}
225+
PandasBlockType::BooleanArray => {
226+
let bblock = ArrayBlock::<bool>::extract(buf)?;
227+
let bcols = bblock.split()?;
228+
for (&cid, bcol) in block.cids.iter().zip_eq(bcols) {
229+
partitioned_columns[cid] = bcol
230+
.partition(counts)
231+
.into_iter()
232+
.map(|c| Box::new(c) as _)
233+
.collect()
234+
}
235+
}
222236
PandasBlockType::Float64Array => {
223237
let fblock = ArrayBlock::<f64>::extract(buf)?;
224238
let fcols = fblock.split()?;

connectorx-python/src/pandas/pandas_columns/array.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,33 @@ where
9494
}
9595
}
9696

97+
impl PandasColumn<Vec<bool>> for ArrayColumn<bool> {
98+
#[throws(ConnectorXPythonError)]
99+
fn write(&mut self, val: Vec<bool>, row: usize) {
100+
self.lengths.push(val.len());
101+
self.buffer.extend_from_slice(&val[..]);
102+
self.row_idx.push(row);
103+
self.try_flush()?;
104+
}
105+
}
106+
107+
impl PandasColumn<Option<Vec<bool>>> for ArrayColumn<bool> {
108+
#[throws(ConnectorXPythonError)]
109+
fn write(&mut self, val: Option<Vec<bool>>, row: usize) {
110+
match val {
111+
Some(v) => {
112+
self.lengths.push(v.len());
113+
self.buffer.extend_from_slice(&v[..]);
114+
self.row_idx.push(row);
115+
self.try_flush()?;
116+
}
117+
None => {
118+
self.lengths.push(usize::MAX);
119+
self.row_idx.push(row);
120+
}
121+
}
122+
}
123+
}
97124
impl PandasColumn<Vec<f64>> for ArrayColumn<f64> {
98125
#[throws(ConnectorXPythonError)]
99126
fn write(&mut self, val: Vec<f64>, row: usize) {
@@ -150,6 +177,14 @@ impl PandasColumn<Option<Vec<i64>>> for ArrayColumn<i64> {
150177
}
151178
}
152179

180+
impl HasPandasColumn for Vec<bool> {
181+
type PandasColumn<'a> = ArrayColumn<bool>;
182+
}
183+
184+
impl HasPandasColumn for Option<Vec<bool>> {
185+
type PandasColumn<'a> = ArrayColumn<bool>;
186+
}
187+
153188
impl HasPandasColumn for Vec<f64> {
154189
type PandasColumn<'a> = ArrayColumn<f64>;
155190
}

connectorx-python/src/pandas/transports/postgres.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ macro_rules! impl_postgres_transport {
3333
{ Int2[i16] => I64[i64] | conversion auto }
3434
{ Int4[i32] => I64[i64] | conversion auto }
3535
{ Int8[i64] => I64[i64] | conversion auto }
36+
{ BoolArray[Vec<bool>] => BoolArray[Vec<bool>] | conversion auto_vec }
3637
{ Int2Array[Vec<i16>] => I64Array[Vec<i64>] | conversion auto_vec }
3738
{ Int4Array[Vec<i32>] => I64Array[Vec<i64>] | conversion auto_vec }
3839
{ Int8Array[Vec<i64>] => I64Array[Vec<i64>] | conversion auto }

connectorx-python/src/pandas/typesystem.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub enum PandasTypeSystem {
99
F64Array(bool),
1010
I64Array(bool),
1111
Bool(bool),
12+
BoolArray(bool),
1213
Char(bool),
1314
Str(bool),
1415
BoxStr(bool),
@@ -23,6 +24,7 @@ pub enum PandasBlockType {
2324
Boolean(bool), // bool indicates nullablity
2425
Int64(bool),
2526
Float64,
27+
BooleanArray,
2628
Int64Array,
2729
Float64Array,
2830
String,
@@ -54,6 +56,7 @@ impl From<PandasTypeSystem> for PandasBlockType {
5456
PandasTypeSystem::Bool(nullable) => PandasBlockType::Boolean(nullable),
5557
PandasTypeSystem::I64(nullable) => PandasBlockType::Int64(nullable),
5658
PandasTypeSystem::F64(_) => PandasBlockType::Float64,
59+
PandasTypeSystem::BoolArray(_) => PandasBlockType::BooleanArray,
5760
PandasTypeSystem::F64Array(_) => PandasBlockType::Float64Array,
5861
PandasTypeSystem::I64Array(_) => PandasBlockType::Int64Array,
5962
PandasTypeSystem::String(_)
@@ -74,6 +77,7 @@ impl_typesystem! {
7477
{ F64Array => Vec<f64> }
7578
{ I64Array => Vec<i64> }
7679
{ Bool => bool }
80+
{ BoolArray => Vec<bool> }
7781
{ Char => char }
7882
{ Str => &'r str }
7983
{ BoxStr => Box<str> }

connectorx/src/destinations/arrow2/arrow_assoc.rs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ impl_arrow_assoc!(f64, ArrowDataType::Float64, MutablePrimitiveArray<f64>);
6262
impl_arrow_assoc!(bool, ArrowDataType::Boolean, MutableBooleanArray);
6363

6464
macro_rules! impl_arrow_assoc_vec {
65-
($T:ty, $AT:expr) => {
65+
($T:ty, $PT:ty, $AT:expr) => {
6666
impl ArrowAssoc for Vec<$T> {
67-
type Builder = MutableListArray<i64, MutablePrimitiveArray<$T>>;
67+
type Builder = MutableListArray<i64, $PT>;
6868

6969
fn builder(nrows: usize) -> Self::Builder {
70-
MutableListArray::<i64, MutablePrimitiveArray<$T>>::with_capacity(nrows)
70+
MutableListArray::<i64, $PT>::with_capacity(nrows)
7171
}
7272

7373
#[inline]
@@ -86,10 +86,10 @@ macro_rules! impl_arrow_assoc_vec {
8686
}
8787

8888
impl ArrowAssoc for Option<Vec<$T>> {
89-
type Builder = MutableListArray<i64, MutablePrimitiveArray<$T>>;
89+
type Builder = MutableListArray<i64, $PT>;
9090

9191
fn builder(nrows: usize) -> Self::Builder {
92-
MutableListArray::<i64, MutablePrimitiveArray<$T>>::with_capacity(nrows)
92+
MutableListArray::<i64, $PT>::with_capacity(nrows)
9393
}
9494

9595
#[inline]
@@ -114,12 +114,19 @@ macro_rules! impl_arrow_assoc_vec {
114114
};
115115
}
116116

117-
impl_arrow_assoc_vec!(i32, ArrowDataType::Int32);
118-
impl_arrow_assoc_vec!(i64, ArrowDataType::Int64);
119-
impl_arrow_assoc_vec!(u32, ArrowDataType::UInt32);
120-
impl_arrow_assoc_vec!(u64, ArrowDataType::UInt64);
121-
impl_arrow_assoc_vec!(f32, ArrowDataType::Float32);
122-
impl_arrow_assoc_vec!(f64, ArrowDataType::Float64);
117+
macro_rules! impl_arrow_assoc_primitive_vec {
118+
($T:ty, $AT:expr) => {
119+
impl_arrow_assoc_vec!($T, MutablePrimitiveArray<$T>, $AT);
120+
};
121+
}
122+
123+
impl_arrow_assoc_vec!(bool, MutableBooleanArray, ArrowDataType::Boolean);
124+
impl_arrow_assoc_primitive_vec!(i32, ArrowDataType::Int32);
125+
impl_arrow_assoc_primitive_vec!(i64, ArrowDataType::Int64);
126+
impl_arrow_assoc_primitive_vec!(u32, ArrowDataType::UInt32);
127+
impl_arrow_assoc_primitive_vec!(u64, ArrowDataType::UInt64);
128+
impl_arrow_assoc_primitive_vec!(f32, ArrowDataType::Float32);
129+
impl_arrow_assoc_primitive_vec!(f64, ArrowDataType::Float64);
123130

124131
impl ArrowAssoc for &str {
125132
type Builder = MutableUtf8Array<i64>;

connectorx/src/destinations/arrow2/typesystem.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub enum Arrow2TypeSystem {
1616
Date64(bool),
1717
Time64(bool),
1818
DateTimeTz(bool),
19+
BoolArray(bool),
1920
Int32Array(bool),
2021
Int64Array(bool),
2122
UInt32Array(bool),
@@ -41,6 +42,7 @@ impl_typesystem! {
4142
{ Date64 => NaiveDateTime }
4243
{ Time64 => NaiveTime }
4344
{ DateTimeTz => DateTime<Utc> }
45+
{ BoolArray => Vec<bool> }
4446
{ Int32Array => Vec<i32> }
4547
{ Int64Array => Vec<i64> }
4648
{ UInt32Array => Vec<u32> }

0 commit comments

Comments
 (0)