diff --git a/Cargo.lock b/Cargo.lock index 5d5765fae3..056006ab9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -245,16 +245,6 @@ dependencies = [ "num", ] -[[package]] -name = "arrow-format" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07884ea216994cdc32a2d5f8274a8bee979cfe90274b83f86f440866ee3132c7" -dependencies = [ - "planus", - "serde", -] - [[package]] name = "arrow-ipc" version = "53.4.0" @@ -356,36 +346,7 @@ dependencies = [ "memchr", "num", "regex", - "regex-syntax 0.8.5", -] - -[[package]] -name = "arrow2" -version = "0.17.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59c468daea140b747d781a1da9f7db5f0a8e6636d4af20cc539e43d05b0604fa" -dependencies = [ - "ahash 0.8.11", - "arrow-format", - "bytemuck", - "chrono", - "dyn-clone", - "either", - "ethnum", - "foreign_vec", - "futures", - "getrandom 0.2.15", - "hash_hasher", - "lexical-core 0.8.5", - "lz4", - "multiversion", - "num-traits", - "regex", - "regex-syntax 0.6.29", - "rustc_version", - "simdutf8", - "strength_reduce", - "zstd 0.12.4", + "regex-syntax", ] [[package]] @@ -401,8 +362,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd 0.13.2", - "zstd-safe 7.2.1", + "zstd", + "zstd-safe", ] [[package]] @@ -699,9 +660,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "4.0.1" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -958,7 +919,7 @@ checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ "crossterm", "strum", - "strum_macros 0.26.4", + "strum_macros", "unicode-width 0.2.0", ] @@ -992,7 +953,6 @@ version = "0.4.1-alpha1" dependencies = [ "anyhow", "arrow", - "arrow2", "bb8", "bb8-tiberius", "chrono", @@ -1015,9 +975,8 @@ dependencies = [ "openssl", "oracle", "owning_ref", - "polars 0.32.1", - "polars 0.45.1", - "polars-arrow 0.45.1", + "polars", + "polars-arrow", "postgres", "postgres-native-tls", "postgres-openssl", @@ -1377,9 +1336,9 @@ dependencies = [ "tokio", "tokio-util 0.7.13", "url", - "uuid 1.12.0", + "uuid 1.12.1", "xz2", - "zstd 0.13.2", + "zstd", ] [[package]] @@ -1516,7 +1475,7 @@ dependencies = [ "regex", "sha2", "unicode-segmentation", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -1645,7 +1604,7 @@ dependencies = [ "log", "recursive", "regex", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] @@ -2017,12 +1976,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fast-float" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" - [[package]] name = "fast-float2" version = "0.2.3" @@ -2109,12 +2062,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" -[[package]] -name = "foreign_vec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -2384,12 +2331,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "hash_hasher" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" - [[package]] name = "hashbrown" version = "0.12.3" @@ -2433,12 +2374,6 @@ dependencies = [ "hashbrown 0.14.5", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -3251,15 +3186,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memmap2" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" -dependencies = [ - "libc", -] - [[package]] name = "memmap2" version = "0.9.5" @@ -3393,7 +3319,7 @@ dependencies = [ "subprocess", "thiserror 1.0.69", "time", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -3787,7 +3713,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd 0.13.2", + "zstd", "zstd-sys", ] @@ -3930,22 +3856,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "polars" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1362d4a136c0ebacb40d88a37ba361738b222fd8a2ee9340a3d8642f698c52b" -dependencies = [ - "getrandom 0.2.15", - "polars-core 0.32.1", - "polars-io 0.32.1", - "polars-lazy 0.32.1", - "polars-ops 0.32.1", - "polars-sql 0.32.1", - "polars-time 0.32.1", - "version_check", -] - [[package]] name = "polars" version = "0.45.1" @@ -3953,31 +3863,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c0af18ae021b0396c42f39396146332957ebc4d4d25d931b4fe73509948f348" dependencies = [ "getrandom 0.2.15", - "polars-arrow 0.45.1", - "polars-core 0.45.1", - "polars-error 0.45.1", - "polars-io 0.45.1", - "polars-lazy 0.45.1", - "polars-ops 0.45.1", + "polars-arrow", + "polars-core", + "polars-error", + "polars-io", + "polars-lazy", + "polars-ops", "polars-parquet", - "polars-sql 0.45.1", - "polars-time 0.45.1", - "polars-utils 0.45.1", - "version_check", -] - -[[package]] -name = "polars-arrow" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f967c901fa5da4ca7f64e813d1268488ba97e9b3004cefc579ff851c197a1138" -dependencies = [ - "arrow2", - "hashbrown 0.14.5", - "multiversion", - "num-traits", - "polars-error 0.32.1", - "thiserror 1.0.69", + "polars-sql", + "polars-time", + "polars-utils", "version_check", ] @@ -4003,15 +3898,15 @@ dependencies = [ "num-traits", "parking_lot 0.12.3", "polars-arrow-format", - "polars-error 0.45.1", + "polars-error", "polars-schema", - "polars-utils 0.45.1", + "polars-utils", "simdutf8", "streaming-iterator", "strength_reduce", - "strum_macros 0.26.4", + "strum_macros", "version_check", - "zstd 0.13.2", + "zstd", ] [[package]] @@ -4038,44 +3933,14 @@ dependencies = [ "itoa", "itoap", "num-traits", - "polars-arrow 0.45.1", - "polars-error 0.45.1", - "polars-utils 0.45.1", + "polars-arrow", + "polars-error", + "polars-utils", "ryu", "strength_reduce", "version_check", ] -[[package]] -name = "polars-core" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24f92fc5b167f668ff85ab9607dfa72e2c09664cacef59297ee8601dee60126" -dependencies = [ - "ahash 0.8.11", - "arrow2", - "bitflags 2.8.0", - "chrono", - "comfy-table", - "either", - "hashbrown 0.14.5", - "indexmap 2.7.1", - "num-traits", - "once_cell", - "polars-arrow 0.32.1", - "polars-error 0.32.1", - "polars-row 0.32.1", - "polars-utils 0.32.1", - "rand 0.8.5", - "rand_distr", - "rayon", - "regex", - "smartstring", - "thiserror 1.0.69", - "version_check", - "xxhash-rust", -] - [[package]] name = "polars-core" version = "0.45.1" @@ -4095,33 +3960,22 @@ dependencies = [ "itoa", "num-traits", "once_cell", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-error 0.45.1", - "polars-row 0.45.1", + "polars-error", + "polars-row", "polars-schema", - "polars-utils 0.45.1", + "polars-utils", "rand 0.8.5", "rand_distr", "rayon", "regex", - "strum_macros 0.26.4", + "strum_macros", "thiserror 2.0.11", "version_check", "xxhash-rust", ] -[[package]] -name = "polars-error" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d09c3a7337e53b38c37b57999038440fa39c6801b9ba48afaecd8e16f7ac0a" -dependencies = [ - "arrow2", - "regex", - "thiserror 1.0.69", -] - [[package]] name = "polars-error" version = "0.45.1" @@ -4145,50 +3999,19 @@ dependencies = [ "hashbrown 0.15.2", "num-traits", "once_cell", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-core 0.45.1", - "polars-io 0.45.1", - "polars-ops 0.45.1", - "polars-plan 0.45.1", - "polars-row 0.45.1", - "polars-time 0.45.1", - "polars-utils 0.45.1", + "polars-core", + "polars-io", + "polars-ops", + "polars-plan", + "polars-row", + "polars-time", + "polars-utils", "rand 0.8.5", "rayon", ] -[[package]] -name = "polars-io" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92cab0df9f2a35702fa5aec99edfaabf9ae8e9cdd0acf69e143ad2d132f34f9c" -dependencies = [ - "ahash 0.8.11", - "arrow2", - "async-trait", - "bytes", - "chrono", - "fast-float", - "futures", - "home", - "lexical", - "lexical-core 0.8.5", - "memchr", - "memmap2 0.7.1", - "num-traits", - "once_cell", - "polars-arrow 0.32.1", - "polars-core 0.32.1", - "polars-error 0.32.1", - "polars-time 0.32.1", - "polars-utils 0.32.1", - "rayon", - "regex", - "simdutf8", - "tokio", -] - [[package]] name = "polars-io" version = "0.45.1" @@ -4211,13 +4034,13 @@ dependencies = [ "num-traits", "once_cell", "percent-encoding", - "polars-arrow 0.45.1", - "polars-core 0.45.1", - "polars-error 0.45.1", + "polars-arrow", + "polars-core", + "polars-error", "polars-parquet", "polars-schema", - "polars-time 0.45.1", - "polars-utils 0.45.1", + "polars-time", + "polars-utils", "rayon", "regex", "ryu", @@ -4226,29 +4049,6 @@ dependencies = [ "tokio-util 0.7.13", ] -[[package]] -name = "polars-lazy" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c33762ec2a55e01c9f8776b34db86257c70a0a3b3929bd4eb91a52aacf61456" -dependencies = [ - "ahash 0.8.11", - "bitflags 2.8.0", - "glob", - "once_cell", - "polars-arrow 0.32.1", - "polars-core 0.32.1", - "polars-io 0.32.1", - "polars-ops 0.32.1", - "polars-pipe 0.32.1", - "polars-plan 0.32.1", - "polars-time 0.32.1", - "polars-utils 0.32.1", - "rayon", - "smartstring", - "version_check", -] - [[package]] name = "polars-lazy" version = "0.45.1" @@ -4259,17 +4059,17 @@ dependencies = [ "bitflags 2.8.0", "memchr", "once_cell", - "polars-arrow 0.45.1", - "polars-core 0.45.1", + "polars-arrow", + "polars-core", "polars-expr", - "polars-io 0.45.1", + "polars-io", "polars-mem-engine", - "polars-ops 0.45.1", - "polars-pipe 0.45.1", - "polars-plan 0.45.1", + "polars-ops", + "polars-pipe", + "polars-plan", "polars-stream", - "polars-time 0.45.1", - "polars-utils 0.45.1", + "polars-time", + "polars-utils", "rayon", "version_check", ] @@ -4281,36 +4081,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f73aa56fc0a4c1e9d56b4a4485800f4780ca214030d32d0150eccc44f71d6dab" dependencies = [ "memmap2 0.9.5", - "polars-arrow 0.45.1", - "polars-core 0.45.1", - "polars-error 0.45.1", + "polars-arrow", + "polars-core", + "polars-error", "polars-expr", - "polars-io 0.45.1", - "polars-ops 0.45.1", - "polars-plan 0.45.1", - "polars-time 0.45.1", - "polars-utils 0.45.1", + "polars-io", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", "rayon", ] -[[package]] -name = "polars-ops" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e825575c96302d2daedfc205a0062180033c92c55bcd6aafc4e109d4d8849ed0" -dependencies = [ - "argminmax", - "arrow2", - "either", - "indexmap 2.7.1", - "memchr", - "polars-arrow 0.32.1", - "polars-core 0.32.1", - "polars-utils 0.32.1", - "smartstring", - "version_check", -] - [[package]] name = "polars-ops" version = "0.45.1" @@ -4329,16 +4111,16 @@ dependencies = [ "indexmap 2.7.1", "memchr", "num-traits", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-core 0.45.1", - "polars-error 0.45.1", + "polars-core", + "polars-error", "polars-schema", - "polars-utils 0.45.1", + "polars-utils", "rayon", "regex", - "regex-syntax 0.8.5", - "strum_macros 0.26.4", + "regex-syntax", + "strum_macros", "unicode-reverse", "version_check", ] @@ -4357,11 +4139,11 @@ dependencies = [ "futures", "hashbrown 0.15.2", "num-traits", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-error 0.45.1", + "polars-error", "polars-parquet-format", - "polars-utils 0.45.1", + "polars-utils", "simdutf8", "streaming-decompression", ] @@ -4376,27 +4158,6 @@ dependencies = [ "futures", ] -[[package]] -name = "polars-pipe" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2bc9a12da9ed043fb0cb51dbcb87b365e4845b7ab6399d7a81e838460c6974" -dependencies = [ - "enum_dispatch", - "hashbrown 0.14.5", - "num-traits", - "polars-arrow 0.32.1", - "polars-core 0.32.1", - "polars-io 0.32.1", - "polars-ops 0.32.1", - "polars-plan 0.32.1", - "polars-row 0.32.1", - "polars-utils 0.32.1", - "rayon", - "smartstring", - "version_check", -] - [[package]] name = "polars-pipe" version = "0.45.1" @@ -4408,39 +4169,17 @@ dependencies = [ "enum_dispatch", "hashbrown 0.15.2", "num-traits", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-core 0.45.1", + "polars-core", "polars-expr", - "polars-io 0.45.1", - "polars-ops 0.45.1", - "polars-plan 0.45.1", - "polars-row 0.45.1", - "polars-utils 0.45.1", + "polars-io", + "polars-ops", + "polars-plan", + "polars-row", + "polars-utils", "rayon", - "uuid 1.12.0", - "version_check", -] - -[[package]] -name = "polars-plan" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb67b014f0295e8e9dbb84404a91d666d477b3bc248a2ed51bc442833b16da35" -dependencies = [ - "ahash 0.8.11", - "arrow2", - "once_cell", - "polars-arrow 0.32.1", - "polars-core 0.32.1", - "polars-io 0.32.1", - "polars-ops 0.32.1", - "polars-time 0.32.1", - "polars-utils 0.32.1", - "rayon", - "regex", - "smartstring", - "strum_macros 0.25.3", + "uuid 1.12.1", "version_check", ] @@ -4462,31 +4201,20 @@ dependencies = [ "num-traits", "once_cell", "percent-encoding", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-core 0.45.1", - "polars-io 0.45.1", - "polars-ops 0.45.1", - "polars-time 0.45.1", - "polars-utils 0.45.1", + "polars-core", + "polars-io", + "polars-ops", + "polars-time", + "polars-utils", "rayon", "recursive", "regex", - "strum_macros 0.26.4", + "strum_macros", "version_check", ] -[[package]] -name = "polars-row" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27f54c1956027bf6301948fb4f2837cf6d6b638d8dd1edf3aaeaa19906a986be" -dependencies = [ - "arrow2", - "polars-error 0.32.1", - "polars-utils 0.32.1", -] - [[package]] name = "polars-row" version = "0.45.1" @@ -4495,10 +4223,10 @@ checksum = "8e36350fb8a90238e02c8ece0f0c4c24f3374197e9c08c1c22cc8b9c526e6c25" dependencies = [ "bitflags 2.8.0", "bytemuck", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-error 0.45.1", - "polars-utils 0.45.1", + "polars-error", + "polars-utils", ] [[package]] @@ -4508,26 +4236,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c6aa4913cffc522cea3ccbc0cafb350bec18fed0a1ef8d417ac88ea320d7749" dependencies = [ "indexmap 2.7.1", - "polars-error 0.45.1", - "polars-utils 0.45.1", + "polars-error", + "polars-utils", "version_check", ] -[[package]] -name = "polars-sql" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfcb15cf8eebd25ea1724109d0153817cd484c6326290585f0736b4e7fcf2f4" -dependencies = [ - "polars-arrow 0.32.1", - "polars-core 0.32.1", - "polars-lazy 0.32.1", - "polars-plan 0.32.1", - "serde", - "serde_json", - "sqlparser 0.36.1", -] - [[package]] name = "polars-sql" version = "0.45.1" @@ -4536,14 +4249,14 @@ checksum = "c62a2247028629b1db384437a9f2792488f0ddb539ec16fb46a5e2bceeba6dbc" dependencies = [ "hex", "once_cell", - "polars-arrow 0.45.1", - "polars-core 0.45.1", - "polars-error 0.45.1", - "polars-lazy 0.45.1", - "polars-ops 0.45.1", - "polars-plan 0.45.1", - "polars-time 0.45.1", - "polars-utils 0.45.1", + "polars-arrow", + "polars-core", + "polars-error", + "polars-lazy", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", "rand 0.8.5", "serde", "serde_json", @@ -4563,15 +4276,15 @@ dependencies = [ "memmap2 0.9.5", "parking_lot 0.12.3", "pin-project-lite", - "polars-core 0.45.1", - "polars-error 0.45.1", + "polars-core", + "polars-error", "polars-expr", - "polars-io 0.45.1", + "polars-io", "polars-mem-engine", - "polars-ops 0.45.1", + "polars-ops", "polars-parquet", - "polars-plan 0.45.1", - "polars-utils 0.45.1", + "polars-plan", + "polars-utils", "rand 0.8.5", "rayon", "recursive", @@ -4580,25 +4293,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "polars-time" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f42d2632f5971c9575041d33cbcfb1f996900c40bbf58bc6eb0a0c5efbecea" -dependencies = [ - "arrow2", - "atoi", - "chrono", - "now", - "once_cell", - "polars-arrow 0.32.1", - "polars-core 0.32.1", - "polars-ops 0.32.1", - "polars-utils 0.32.1", - "regex", - "smartstring", -] - [[package]] name = "polars-time" version = "0.45.1" @@ -4611,31 +4305,14 @@ dependencies = [ "chrono-tz 0.10.1", "now", "once_cell", - "polars-arrow 0.45.1", + "polars-arrow", "polars-compute", - "polars-core 0.45.1", - "polars-error 0.45.1", - "polars-ops 0.45.1", - "polars-utils 0.45.1", + "polars-core", + "polars-error", + "polars-ops", + "polars-utils", "regex", - "strum_macros 0.26.4", -] - -[[package]] -name = "polars-utils" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c326708a370d71dc6e11a8f4bbc10a8479e1c314dc048ba73543b815cd0bf339" -dependencies = [ - "ahash 0.8.11", - "hashbrown 0.14.5", - "num-traits", - "once_cell", - "polars-error 0.32.1", - "rayon", - "smartstring", - "sysinfo 0.29.11", - "version_check", + "strum_macros", ] [[package]] @@ -4654,12 +4331,12 @@ dependencies = [ "memmap2 0.9.5", "num-traits", "once_cell", - "polars-error 0.45.1", + "polars-error", "rand 0.8.5", "raw-cpuid", "rayon", "stacker", - "sysinfo 0.32.1", + "sysinfo", "version_check", ] @@ -4817,7 +4494,7 @@ dependencies = [ "thiserror 1.0.69", "tokio", "urlencoding", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -4927,7 +4604,7 @@ checksum = "4dc290b669d30e20751e813517bbe13662d020419c5c8818ff10b6e8bb7777f6" dependencies = [ "r2d2", "rusqlite", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -5104,7 +4781,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] @@ -5115,15 +4792,9 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" version = "0.8.5" @@ -5234,7 +4905,7 @@ dependencies = [ "rkyv_derive", "seahash", "tinyvec", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -5313,9 +4984,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ "bitflags 2.8.0", "errno", @@ -5617,17 +5288,6 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "static_assertions", - "version_check", -] - [[package]] name = "snafu" version = "0.8.5" @@ -5643,7 +5303,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn 2.0.96", @@ -5687,15 +5347,6 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -[[package]] -name = "sqlparser" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" -dependencies = [ - "log", -] - [[package]] name = "sqlparser" version = "0.37.0" @@ -5816,26 +5467,13 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -[[package]] -name = "strum_macros" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.96", -] - [[package]] name = "strum_macros" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "rustversion", @@ -5920,20 +5558,6 @@ dependencies = [ "syn 2.0.96", ] -[[package]] -name = "sysinfo" -version = "0.29.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" -dependencies = [ - "cfg-if", - "core-foundation-sys", - "libc", - "ntapi", - "once_cell", - "winapi", -] - [[package]] name = "sysinfo" version = "0.32.1" @@ -6489,9 +6113,9 @@ checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" [[package]] name = "uuid" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ "getrandom 0.2.15", "rand 0.8.5", @@ -7107,32 +6731,13 @@ dependencies = [ "syn 2.0.96", ] -[[package]] -name = "zstd" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" -dependencies = [ - "zstd-safe 6.0.6", -] - [[package]] name = "zstd" version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "zstd-safe 7.2.1", -] - -[[package]] -name = "zstd-safe" -version = "6.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 290c48796f..763bfc9495 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,5 @@ lto = true [workspace.dependencies] arrow = {version = "53", features = ["prettyprint", "ffi"]} -arrow2 = {version = "0.17", default-features = false} polars = {version = "0.45", features=["dtype-u8", "dtype-u16", "lazy"]} polars-arrow = {version = "0.45"} \ No newline at end of file diff --git a/Justfile b/Justfile index 38197eb6fc..c3d730b758 100644 --- a/Justfile +++ b/Justfile @@ -28,7 +28,6 @@ test-feature-gate: cargo c --features src_dummy cargo c --features src_trino cargo c --features dst_arrow - cargo c --features dst_arrow2 bootstrap-python: cd connectorx-python && poetry install diff --git a/connectorx-cpp/Cargo.toml b/connectorx-cpp/Cargo.toml index 823c59c8f0..08c5bb57ee 100644 --- a/connectorx-cpp/Cargo.toml +++ b/connectorx-cpp/Cargo.toml @@ -22,7 +22,7 @@ light = ["fptr", "nbstr", "dsts_light" , "srcs_light", "federation"] srcs_light = ["connectorx/src_postgres"] dsts_light = ["connectorx/dst_arrow"] -dsts = ["connectorx/dst_arrow", "connectorx/dst_arrow2"] +dsts = ["connectorx/dst_arrow"] fptr = ["connectorx/fptr"] branch = ["connectorx/branch"] federation = ["connectorx/federation"] diff --git a/connectorx-python/Cargo.lock b/connectorx-python/Cargo.lock index 7ef0247b8c..9b147ff3f5 100644 --- a/connectorx-python/Cargo.lock +++ b/connectorx-python/Cargo.lock @@ -102,21 +102,6 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" -[[package]] -name = "argminmax" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" -dependencies = [ - "num-traits", -] - -[[package]] -name = "array-init-cursor" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" - [[package]] name = "arrayref" version = "0.3.9" @@ -245,16 +230,6 @@ dependencies = [ "num", ] -[[package]] -name = "arrow-format" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07884ea216994cdc32a2d5f8274a8bee979cfe90274b83f86f440866ee3132c7" -dependencies = [ - "planus", - "serde", -] - [[package]] name = "arrow-ipc" version = "53.4.0" @@ -356,36 +331,7 @@ dependencies = [ "memchr", "num", "regex", - "regex-syntax 0.8.5", -] - -[[package]] -name = "arrow2" -version = "0.17.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59c468daea140b747d781a1da9f7db5f0a8e6636d4af20cc539e43d05b0604fa" -dependencies = [ - "ahash 0.8.11", - "arrow-format", - "bytemuck", - "chrono", - "dyn-clone", - "either", - "ethnum", - "foreign_vec", - "futures", - "getrandom 0.2.15", - "hash_hasher", - "lexical-core 0.8.5", - "lz4", - "multiversion", - "num-traits", - "regex", - "regex-syntax 0.6.29", - "rustc_version", - "simdutf8", - "strength_reduce", - "zstd 0.12.4", + "regex-syntax", ] [[package]] @@ -401,8 +347,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd 0.13.2", - "zstd-safe 7.2.1", + "zstd", + "zstd-safe", ] [[package]] @@ -524,22 +470,21 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bb8" -version = "0.7.1" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e9f4fa9768efd269499d8fba693260cfc670891cf6de3adc935588447a77cc8" +checksum = "d89aabfae550a5c44b43ab941844ffcd2e993cb6900b342debf59e9ea74acdb8" dependencies = [ "async-trait", - "futures-channel", "futures-util", - "parking_lot 0.11.2", + "parking_lot 0.12.3", "tokio", ] [[package]] name = "bb8-tiberius" -version = "0.8.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a33c87124c1938413e45ab6a6655e49d9e4cd015e05db61d6ab5a4f96b2c83" +checksum = "61780ef76db8989f8fd30d9a63fcb1d7d1b8d7df3b7d4662c09e66474877b0e6" dependencies = [ "async-trait", "bb8", @@ -694,9 +639,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "4.0.1" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -751,20 +696,6 @@ name = "bytemuck" version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" -dependencies = [ - "bytemuck_derive", -] - -[[package]] -name = "bytemuck_derive" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.96", -] [[package]] name = "byteorder" @@ -964,9 +895,8 @@ version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ - "crossterm", "strum", - "strum_macros 0.26.4", + "strum_macros", "unicode-width 0.2.0", ] @@ -985,7 +915,6 @@ version = "0.4.1-alpha1" dependencies = [ "anyhow", "arrow", - "arrow2", "bb8", "bb8-tiberius", "chrono", @@ -1005,7 +934,6 @@ dependencies = [ "openssl", "oracle", "owning_ref", - "polars", "postgres", "postgres-native-tls", "postgres-openssl", @@ -1037,7 +965,6 @@ version = "0.4.1-alpha1" dependencies = [ "anyhow", "arrow", - "arrow2", "bitfield", "built", "bytes", @@ -1251,28 +1178,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crossterm" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" -dependencies = [ - "bitflags 2.8.0", - "crossterm_winapi", - "parking_lot 0.12.3", - "rustix", - "winapi", -] - -[[package]] -name = "crossterm_winapi" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" -dependencies = [ - "winapi", -] - [[package]] name = "crunchy" version = "0.2.2" @@ -1406,9 +1311,9 @@ dependencies = [ "tokio", "tokio-util 0.7.13", "url", - "uuid 1.12.0", + "uuid 1.12.1", "xz2", - "zstd 0.13.2", + "zstd", ] [[package]] @@ -1545,7 +1450,7 @@ dependencies = [ "regex", "sha2", "unicode-segmentation", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -1674,7 +1579,7 @@ dependencies = [ "log", "recursive", "regex", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] @@ -1876,12 +1781,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" -[[package]] -name = "dyn-clone" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" - [[package]] name = "either" version = "1.13.0" @@ -1961,18 +1860,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "enum_dispatch" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" -dependencies = [ - "once_cell", - "proc-macro2", - "quote", - "syn 2.0.96", -] - [[package]] name = "enumflags2" version = "0.7.11" @@ -2022,12 +1909,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "ethnum" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" - [[package]] name = "fallible-iterator" version = "0.2.0" @@ -2046,12 +1927,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fast-float" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" - [[package]] name = "fastrand" version = "2.3.0" @@ -2132,12 +2007,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" -[[package]] -name = "foreign_vec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -2353,10 +2222,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "wasm-bindgen", ] [[package]] @@ -2407,12 +2274,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "hash_hasher" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" - [[package]] name = "hashbrown" version = "0.12.3" @@ -2430,7 +2291,6 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash 0.8.11", "allocator-api2", - "rayon", ] [[package]] @@ -2457,12 +2317,6 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -3204,25 +3058,6 @@ dependencies = [ "hashbrown 0.12.3", ] -[[package]] -name = "lz4" -version = "1.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" -dependencies = [ - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.11.1+lz4-1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "lz4_flex" version = "0.11.3" @@ -3284,15 +3119,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memmap2" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" -dependencies = [ - "libc", -] - [[package]] name = "memoffset" version = "0.6.5" @@ -3349,28 +3175,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "multiversion" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4851161a11d3ad0bf9402d90ffc3967bf231768bfd7aeb61755ad06dbf1a142" -dependencies = [ - "multiversion-macros", - "target-features", -] - -[[package]] -name = "multiversion-macros" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a74ddee9e0c27d2578323c13905793e91622148f138ba29738f9dddb835e90" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "target-features", -] - [[package]] name = "mysql" version = "23.0.1" @@ -3432,7 +3236,7 @@ dependencies = [ "subprocess", "thiserror", "time", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -3499,24 +3303,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "now" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d89e9874397a1f0a52fc1f197a8effd9735223cb2390e9dcc83ac6cd02923d0" -dependencies = [ - "chrono", -] - -[[package]] -name = "ntapi" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" -dependencies = [ - "winapi", -] - [[package]] name = "num" version = "0.4.3" @@ -3727,20 +3513,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "opentls" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f561874f8d6ecfb674fc08863414040c93cc90c0b6963fe679895fab8b65560" -dependencies = [ - "futures-util", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "url", -] - [[package]] name = "option-ext" version = "0.2.0" @@ -3870,7 +3642,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd 0.13.2", + "zstd", "zstd-sys", ] @@ -3986,15 +3758,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" -[[package]] -name = "planus" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" -dependencies = [ - "array-init-cursor", -] - [[package]] name = "plotters" version = "0.3.7" @@ -4023,255 +3786,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "polars" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1362d4a136c0ebacb40d88a37ba361738b222fd8a2ee9340a3d8642f698c52b" -dependencies = [ - "getrandom 0.2.15", - "polars-core", - "polars-io", - "polars-lazy", - "polars-ops", - "polars-sql", - "polars-time", - "version_check", -] - -[[package]] -name = "polars-arrow" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f967c901fa5da4ca7f64e813d1268488ba97e9b3004cefc579ff851c197a1138" -dependencies = [ - "arrow2", - "hashbrown 0.14.5", - "multiversion", - "num-traits", - "polars-error", - "thiserror", - "version_check", -] - -[[package]] -name = "polars-core" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24f92fc5b167f668ff85ab9607dfa72e2c09664cacef59297ee8601dee60126" -dependencies = [ - "ahash 0.8.11", - "arrow2", - "bitflags 2.8.0", - "chrono", - "comfy-table", - "either", - "hashbrown 0.14.5", - "indexmap 2.7.1", - "num-traits", - "once_cell", - "polars-arrow", - "polars-error", - "polars-row", - "polars-utils", - "rand 0.8.5", - "rand_distr", - "rayon", - "regex", - "smartstring", - "thiserror", - "version_check", - "xxhash-rust", -] - -[[package]] -name = "polars-error" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d09c3a7337e53b38c37b57999038440fa39c6801b9ba48afaecd8e16f7ac0a" -dependencies = [ - "arrow2", - "regex", - "thiserror", -] - -[[package]] -name = "polars-io" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92cab0df9f2a35702fa5aec99edfaabf9ae8e9cdd0acf69e143ad2d132f34f9c" -dependencies = [ - "ahash 0.8.11", - "arrow2", - "async-trait", - "bytes", - "chrono", - "fast-float", - "futures", - "home", - "lexical", - "lexical-core 0.8.5", - "memchr", - "memmap2 0.7.1", - "num-traits", - "once_cell", - "polars-arrow", - "polars-core", - "polars-error", - "polars-time", - "polars-utils", - "rayon", - "regex", - "simdutf8", - "tokio", -] - -[[package]] -name = "polars-lazy" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c33762ec2a55e01c9f8776b34db86257c70a0a3b3929bd4eb91a52aacf61456" -dependencies = [ - "ahash 0.8.11", - "bitflags 2.8.0", - "glob", - "once_cell", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-pipe", - "polars-plan", - "polars-time", - "polars-utils", - "rayon", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-ops" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e825575c96302d2daedfc205a0062180033c92c55bcd6aafc4e109d4d8849ed0" -dependencies = [ - "argminmax", - "arrow2", - "either", - "indexmap 2.7.1", - "memchr", - "polars-arrow", - "polars-core", - "polars-utils", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-pipe" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2bc9a12da9ed043fb0cb51dbcb87b365e4845b7ab6399d7a81e838460c6974" -dependencies = [ - "enum_dispatch", - "hashbrown 0.14.5", - "num-traits", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-plan", - "polars-row", - "polars-utils", - "rayon", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-plan" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb67b014f0295e8e9dbb84404a91d666d477b3bc248a2ed51bc442833b16da35" -dependencies = [ - "ahash 0.8.11", - "arrow2", - "once_cell", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-time", - "polars-utils", - "rayon", - "regex", - "smartstring", - "strum_macros 0.25.3", - "version_check", -] - -[[package]] -name = "polars-row" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27f54c1956027bf6301948fb4f2837cf6d6b638d8dd1edf3aaeaa19906a986be" -dependencies = [ - "arrow2", - "polars-error", - "polars-utils", -] - -[[package]] -name = "polars-sql" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfcb15cf8eebd25ea1724109d0153817cd484c6326290585f0736b4e7fcf2f4" -dependencies = [ - "polars-arrow", - "polars-core", - "polars-lazy", - "polars-plan", - "serde", - "serde_json", - "sqlparser 0.36.1", -] - -[[package]] -name = "polars-time" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f42d2632f5971c9575041d33cbcfb1f996900c40bbf58bc6eb0a0c5efbecea" -dependencies = [ - "arrow2", - "atoi", - "chrono", - "now", - "once_cell", - "polars-arrow", - "polars-core", - "polars-ops", - "polars-utils", - "regex", - "smartstring", -] - -[[package]] -name = "polars-utils" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c326708a370d71dc6e11a8f4bbc10a8479e1c314dc048ba73543b815cd0bf339" -dependencies = [ - "ahash 0.8.11", - "hashbrown 0.14.5", - "num-traits", - "once_cell", - "polars-error", - "rayon", - "smartstring", - "sysinfo", - "version_check", -] - [[package]] name = "portable-atomic" version = "1.10.0" @@ -4496,7 +4010,7 @@ dependencies = [ "thiserror", "tokio", "urlencoding", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -4675,7 +4189,7 @@ checksum = "4dc290b669d30e20751e813517bbe13662d020419c5c8818ff10b6e8bb7777f6" dependencies = [ "r2d2", "rusqlite", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -4746,16 +4260,6 @@ dependencies = [ "getrandom 0.2.15", ] -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand 0.8.5", -] - [[package]] name = "rand_hc" version = "0.2.0" @@ -4849,7 +4353,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] @@ -4860,15 +4364,9 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" version = "0.8.5" @@ -4979,7 +4477,7 @@ dependencies = [ "rkyv_derive", "seahash", "tinyvec", - "uuid 1.12.0", + "uuid 1.12.1", ] [[package]] @@ -5058,9 +4556,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ "bitflags 2.8.0", "errno", @@ -5356,17 +4854,6 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "static_assertions", - "version_check", -] - [[package]] name = "snafu" version = "0.8.5" @@ -5426,15 +4913,6 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -[[package]] -name = "sqlparser" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" -dependencies = [ - "log", -] - [[package]] name = "sqlparser" version = "0.37.0" @@ -5496,12 +4974,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" -[[package]] -name = "strength_reduce" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" - [[package]] name = "stringprep" version = "0.1.5" @@ -5531,19 +5003,6 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -[[package]] -name = "strum_macros" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.96", -] - [[package]] name = "strum_macros" version = "0.26.4" @@ -5580,7 +5039,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f551f902d5642e58039aee6a9021a61037926af96e071816361644983966f540" dependencies = [ "debugid", - "memmap2 0.5.10", + "memmap2", "stable_deref_trait", "uuid 0.8.2", ] @@ -5635,20 +5094,6 @@ dependencies = [ "syn 2.0.96", ] -[[package]] -name = "sysinfo" -version = "0.29.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" -dependencies = [ - "cfg-if", - "core-foundation-sys", - "libc", - "ntapi", - "once_cell", - "winapi", -] - [[package]] name = "system-configuration" version = "0.5.1" @@ -5676,12 +5121,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" -[[package]] -name = "target-features" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" - [[package]] name = "target-lexicon" version = "0.12.16" @@ -5753,9 +5192,9 @@ dependencies = [ [[package]] name = "tiberius" -version = "0.7.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "833311bc8e26e96c73ad1b5c1f488c588808c747a318905ec67e43d422ea2c08" +checksum = "31d6bfb7b1de4275b4cf566bad8d0c133d800e3d8b35d256407371fab49cfed6" dependencies = [ "async-native-tls", "async-trait", @@ -5772,7 +5211,6 @@ dependencies = [ "libgssapi", "num-traits", "once_cell", - "opentls", "pin-project-lite", "pretty-hex", "rust_decimal", @@ -6184,9 +5622,9 @@ checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" [[package]] name = "uuid" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ "getrandom 0.2.15", "rand 0.8.5", @@ -6619,12 +6057,6 @@ dependencies = [ "tap", ] -[[package]] -name = "xxhash-rust" -version = "0.8.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" - [[package]] name = "xz2" version = "0.1.7" @@ -6749,32 +6181,13 @@ dependencies = [ "syn 2.0.96", ] -[[package]] -name = "zstd" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" -dependencies = [ - "zstd-safe 6.0.6", -] - [[package]] name = "zstd" version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "zstd-safe 7.2.1", -] - -[[package]] -name = "zstd-safe" -version = "6.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] diff --git a/connectorx-python/Cargo.toml b/connectorx-python/Cargo.toml index 10bbedb901..80ad864584 100644 --- a/connectorx-python/Cargo.toml +++ b/connectorx-python/Cargo.toml @@ -14,7 +14,6 @@ readme = "README.md" [dependencies] anyhow = "1" arrow = { version = "53" } -arrow2 = {version = "0.17", default-features = false} bitfield = "0.13" bytes = "1.4" chrono = "0.4" @@ -65,7 +64,7 @@ name = "connectorx" [features] branch = ["connectorx/branch"] default = ["extension", "fptr", "nbstr", "dsts", "srcs", "federation", "fed_exec"] -dsts = ["connectorx/dst_arrow", "connectorx/dst_arrow2"] +dsts = ["connectorx/dst_arrow"] executable = ["pyo3/auto-initialize"] extension = ["pyo3/extension-module"] fptr = ["connectorx/fptr"] diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py index ea5bcb4d1a..737a1de57c 100644 --- a/connectorx-python/connectorx/__init__.py +++ b/connectorx-python/connectorx/__init__.py @@ -196,7 +196,7 @@ def read_sql( conn: str | ConnectionUrl | dict[str, str] | dict[str, ConnectionUrl], query: list[str] | str, *, - return_type: Literal["arrow", "arrow2"], + return_type: Literal["arrow"], protocol: Protocol | None = None, partition_on: str | None = None, partition_range: tuple[int, int] | None = None, @@ -238,7 +238,7 @@ def read_sql( conn: str | ConnectionUrl | dict[str, str] | dict[str, ConnectionUrl], query: list[str] | str, *, - return_type: Literal["polars", "polars2"], + return_type: Literal["polars"], protocol: Protocol | None = None, partition_on: str | None = None, partition_range: tuple[int, int] | None = None, @@ -252,7 +252,7 @@ def read_sql( query: list[str] | str, *, return_type: Literal[ - "pandas", "polars", "polars2", "arrow", "arrow2", "modin", "dask" + "pandas", "polars", "arrow", "modin", "dask" ] = "pandas", protocol: Protocol | None = None, partition_on: str | None = None, @@ -383,18 +383,18 @@ def read_sql( dd = try_import_module("dask.dataframe") df = dd.from_pandas(df, npartitions=1) - elif return_type in {"arrow", "arrow2", "polars", "polars2"}: + elif return_type in {"arrow", "polars"}: try_import_module("pyarrow") result = _read_sql( conn, - "arrow2" if return_type in {"arrow2", "polars2"} else "arrow", + "arrow", queries=queries, protocol=protocol, partition_query=partition_query, ) df = reconstruct_arrow(result) - if return_type in {"polars", "polars2"}: + if return_type in {"polars"}: pl = try_import_module("polars") try: df = pl.from_arrow(df) diff --git a/connectorx-python/connectorx/connectorx.pyi b/connectorx-python/connectorx/connectorx.pyi index c1ccbfa530..f116a97fa9 100644 --- a/connectorx-python/connectorx/connectorx.pyi +++ b/connectorx-python/connectorx/connectorx.pyi @@ -29,7 +29,7 @@ def read_sql( @overload def read_sql( conn: str, - return_type: Literal["arrow", "arrow2"], + return_type: Literal["arrow"], protocol: str | None, queries: list[str] | None, partition_query: dict[str, Any] | None, diff --git a/connectorx-python/connectorx/tests/test_arrow.py b/connectorx-python/connectorx/tests/test_arrow.py index 458f41d1c2..8789c3e1c1 100644 --- a/connectorx-python/connectorx/tests/test_arrow.py +++ b/connectorx-python/connectorx/tests/test_arrow.py @@ -43,36 +43,6 @@ def test_arrow(postgres_url: str) -> None: df.sort_values(by="test_int", inplace=True, ignore_index=True) assert_frame_equal(df, expected, check_names=True) - -def test_arrow2(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - return_type="arrow2", - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int32"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="float64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="object" - ), - }, - ) - - df = df.to_pandas() - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - def test_arrow_type(postgres_url: str) -> None: query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_bytea, test_json, test_jsonb, test_ltree, test_name FROM test_types" df = read_sql(postgres_url, query, return_type="arrow") @@ -167,121 +137,4 @@ def test_arrow_type(postgres_url: str) -> None: }, ) - assert_frame_equal(df, expected, check_names=True) -def test_arrow2_type(postgres_url: str) -> None: - query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_bytea, test_json, test_jsonb, test_f4array, test_f8array, test_narray, test_i2array, test_i4array, test_i8array, test_enum, test_ltree, test_name FROM test_types" - df = read_sql(postgres_url, query, return_type="arrow2") - df = df.to_pandas(date_as_object=False) - df.sort_values(by="test_int16", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(4), - data={ - "test_date": pd.Series( - ["1970-01-01", "2000-02-28", "2038-01-18", None], dtype="datetime64[ms]" - ), - "test_timestamp": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 12:00:10", - "2038-01-18 23:59:59", - None, - ], - dtype="datetime64[us]", - ), - "test_timestamptz": pd.Series( - [ - "1970-01-01 00:00:01+00:00", - "2000-02-28 16:00:10+00:00", - "2038-01-18 15:59:59+00:00", - None, - ], - dtype="datetime64[us, UTC]", - ), - "test_int16": pd.Series([0, 1, 2, 3], dtype="int32"), - "test_int64": pd.Series( - [-9223372036854775808, 0, 9223372036854775807, None], dtype="float64" - ), - "test_float32": pd.Series( - [None, 3.1415926535, 2.71, -1e-37], dtype="float32" - ), - "test_numeric": pd.Series([None, 521.34, 0.00, 0.00], dtype="float64"), - "test_bpchar": pd.Series(["a ", "bb ", "ccc ", None], dtype="object"), - "test_char": pd.Series(["a", "b", None, "d"], dtype="object"), - "test_varchar": pd.Series([None, "bb", "c", "defghijklm"], dtype="object"), - "test_uuid": pd.Series( - [ - "86b494cc-96b2-11eb-9298-3e22fbb9fe9d", - "86b49b84-96b2-11eb-9298-3e22fbb9fe9d", - "86b49c42-96b2-11eb-9298-3e22fbb9fe9d", - None, - ], - dtype="object", - ), - "test_time": pd.Series( - [ - datetime.time(8, 12, 40), - None, - datetime.time(23, 0, 10), - datetime.time(18, 30), - ], - dtype="object", - ), - "test_bytea": pd.Series( - [ - None, - b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5", - b"", - b"\xf0\x9f\x98\x9c", - ], - dtype="object", - ), - "test_json": pd.Series( - [ - '{"customer":"John Doe","items":{"product":"Beer","qty":6}}', - '{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}', - '{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}', - None, - ], - dtype="object", - ), - "test_jsonb": pd.Series( - [ - '{"product":"Beer","qty":6}', - '{"product":"Diaper","qty":24}', - '{"product":"Toy Car","qty":1}', - None, - ], - dtype="object", - ), - "test_f4array": pd.Series( - [[], None, [123.123], [-1e-37, 1e37]], dtype="object" - ), - "test_f8array": pd.Series( - [[], None, [1e-307, 1e308], [0.000234, -12.987654321]], dtype="object" - ), - "test_narray": pd.Series( - [[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object" - ), - "test_i2array": pd.Series( - [[-1, 0, 1], [], [-32768, 32767], None], dtype="object" - ), - "test_i4array": pd.Series( - [[-1, 0, 1123], [], [-2147483648, 2147483647], None], dtype="object" - ), - "test_i8array": pd.Series( - [[-9223372036854775808, 9223372036854775807], [], [0], None], - dtype="object", - ), - "test_enum": pd.Series( - ["happy", "very happy", "ecstatic", None], dtype="object" - ), - "test_ltree": pd.Series( - ["A.B.C.D", "A.B.E", "A", None], dtype="object" - ), - "test_name": pd.Series( - ["0", "21", "someName", "101203203-1212323-22131235"] - ) - - }, - ) assert_frame_equal(df, expected, check_names=True) \ No newline at end of file diff --git a/connectorx-python/src/arrow2.rs b/connectorx-python/src/arrow2.rs deleted file mode 100644 index 2a70a3e235..0000000000 --- a/connectorx-python/src/arrow2.rs +++ /dev/null @@ -1,61 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use arrow2::{ - array::Array, - chunk::Chunk, - datatypes::{Field, Schema}, - ffi, -}; -use connectorx::source_router::SourceConn; -use connectorx::{prelude::*, sql::CXQuery}; -use fehler::throws; -use libc::uintptr_t; -use pyo3::prelude::*; -use pyo3::{PyAny, Python}; -use std::sync::Arc; - -#[throws(ConnectorXPythonError)] -pub fn write_arrow<'py>( - py: Python<'py>, - source_conn: &SourceConn, - origin_query: Option, - queries: &[CXQuery], -) -> Bound<'py, PyAny> { - let ptrs = py.allow_threads( - || -> Result<(Vec, Vec>), ConnectorXPythonError> { - let destination = get_arrow2(source_conn, origin_query, queries)?; - let (rbs, schema) = destination.arrow()?; - Ok(to_ptrs(rbs, schema)) - }, - )?; - let obj: PyObject = ptrs.into_py(py); - obj.into_bound(py) -} - -fn to_ptrs( - rbs: Vec>>, - schema: Arc, -) -> (Vec, Vec>) { - if rbs.is_empty() { - return (vec![], vec![]); - } - - let mut result = vec![]; - let names = schema.fields.iter().map(|f| f.name.clone()).collect(); - - for rb in rbs.into_iter() { - let mut cols = vec![]; - - for array in rb.into_arrays() { - let schema_ptr = - ffi::export_field_to_c(&Field::new("", array.data_type().clone(), true)); - let array_ptr = ffi::export_array_to_c(array); - let array_ptr = Box::into_raw(Box::new(array_ptr)); - let schema_ptr = Box::into_raw(Box::new(schema_ptr)); - - cols.push((array_ptr as uintptr_t, schema_ptr as uintptr_t)); - } - - result.push(cols); - } - (names, result) -} diff --git a/connectorx-python/src/cx_read_sql.rs b/connectorx-python/src/cx_read_sql.rs index c6416ceec3..2a1a457998 100644 --- a/connectorx-python/src/cx_read_sql.rs +++ b/connectorx-python/src/cx_read_sql.rs @@ -69,12 +69,6 @@ pub fn read_sql<'py>( origin_query, &queries, )?), - "arrow2" => Ok(crate::arrow2::write_arrow( - py, - &source_conn, - origin_query, - &queries, - )?), _ => Err(PyValueError::new_err(format!( "return type should be 'pandas' or 'arrow', got '{}'", return_type diff --git a/connectorx-python/src/errors.rs b/connectorx-python/src/errors.rs index 929023e057..f04fbc2a32 100644 --- a/connectorx-python/src/errors.rs +++ b/connectorx-python/src/errors.rs @@ -48,9 +48,6 @@ pub enum ConnectorXPythonError { #[error(transparent)] ArrowDestinationError(#[from] connectorx::destinations::arrow::ArrowDestinationError), - #[error(transparent)] - Arrow2DestinationError(#[from] connectorx::destinations::arrow2::Arrow2DestinationError), - /// Any other errors that are too trivial to be put here explicitly. #[error(transparent)] Other(#[from] anyhow::Error), diff --git a/connectorx-python/src/lib.rs b/connectorx-python/src/lib.rs index 4f27cc30d4..2ef567b4fc 100644 --- a/connectorx-python/src/lib.rs +++ b/connectorx-python/src/lib.rs @@ -1,5 +1,4 @@ pub mod arrow; -pub mod arrow2; pub mod constants; pub mod cx_read_sql; mod errors; diff --git a/connectorx/Cargo.toml b/connectorx/Cargo.toml index e58427936d..d0e81a0926 100644 --- a/connectorx/Cargo.toml +++ b/connectorx/Cargo.toml @@ -23,10 +23,8 @@ serde_json = "1" chrono = "0.4" arrow = {workspace = true, optional = true} -arrow2 = {workspace = true, default-features = false, optional = true} polars = {workspace = true, optional = true, features=["dtype-u8", "dtype-u16", "lazy"]} polars-arrow = {workspace = true, optional = true} -polars_old = { package = "polars", version = "0.32", optional = true, features=["dtype-u8", "dtype-u16"]} bb8 = {version = "0.8", optional = true} bb8-tiberius = {version = "0.11", optional = true} csv = {version = "1", optional = true} @@ -72,11 +70,10 @@ iai = "0.1" pprof = {version = "0.5", features = ["flamegraph"]} [features] -all = ["src_sqlite", "src_postgres", "src_mysql", "src_mssql", "src_oracle", "src_bigquery", "src_csv", "src_dummy", "src_trino", "dst_arrow", "dst_arrow2", "dst_polars", "federation", "fed_exec"] +all = ["src_sqlite", "src_postgres", "src_mysql", "src_mssql", "src_oracle", "src_bigquery", "src_csv", "src_dummy", "src_trino", "dst_arrow", "dst_polars", "federation", "fed_exec"] branch = [] default = ["fptr"] dst_arrow = ["arrow"] -dst_arrow2 = ["polars_old", "arrow2"] dst_polars = ["dst_arrow", "polars", "polars-arrow"] fptr = [] src_bigquery = ["gcp-bigquery-client", "tokio"] diff --git a/connectorx/src/constants.rs b/connectorx/src/constants.rs index b65cca47d1..bcdd779089 100644 --- a/connectorx/src/constants.rs +++ b/connectorx/src/constants.rs @@ -1,10 +1,10 @@ -#[cfg(any(feature = "dst_arrow", feature = "dst_arrow2"))] +#[cfg(feature = "dst_arrow")] pub(crate) const SECONDS_IN_DAY: i64 = 86_400; #[allow(dead_code)] const KILO: usize = 1 << 10; -#[cfg(any(feature = "dst_arrow", feature = "dst_arrow2"))] +#[cfg(feature = "dst_arrow")] pub const RECORD_BATCH_SIZE: usize = 64 * KILO; #[cfg(any( diff --git a/connectorx/src/destinations/arrow2/arrow_assoc.rs b/connectorx/src/destinations/arrow2/arrow_assoc.rs deleted file mode 100644 index 22b9b27dfc..0000000000 --- a/connectorx/src/destinations/arrow2/arrow_assoc.rs +++ /dev/null @@ -1,600 +0,0 @@ -use super::typesystem::{DateTimeWrapperMicro, NaiveDateTimeWrapperMicro, NaiveTimeWrapperMicro}; -use arrow2::{ - array::*, - datatypes::{DataType as ArrowDataType, Field, TimeUnit}, -}; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc}; - -use crate::constants::SECONDS_IN_DAY; - -/// Associate arrow builder with native type -pub trait ArrowAssoc { - type Builder: MutableArray + 'static + Send; - - fn builder(nrows: usize) -> Self::Builder; - fn push(builder: &mut Self::Builder, value: Self); - fn field(header: &str) -> Field; -} - -macro_rules! impl_arrow_assoc { - ($T:ty, $AT:expr, $B:ty) => { - impl ArrowAssoc for $T { - type Builder = $B; - - fn builder(nrows: usize) -> Self::Builder { - Self::Builder::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - builder.push(Some(value)); - } - - fn field(header: &str) -> Field { - Field::new(header, $AT, false) - } - } - - impl ArrowAssoc for Option<$T> { - type Builder = $B; - - fn builder(nrows: usize) -> Self::Builder { - Self::Builder::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - builder.push(value); - } - - fn field(header: &str) -> Field { - Field::new(header, $AT, true) - } - } - }; -} - -impl_arrow_assoc!(u32, ArrowDataType::UInt32, MutablePrimitiveArray); -impl_arrow_assoc!(u64, ArrowDataType::UInt64, MutablePrimitiveArray); -impl_arrow_assoc!(i32, ArrowDataType::Int32, MutablePrimitiveArray); -impl_arrow_assoc!(i64, ArrowDataType::Int64, MutablePrimitiveArray); -impl_arrow_assoc!(f32, ArrowDataType::Float32, MutablePrimitiveArray); -impl_arrow_assoc!(f64, ArrowDataType::Float64, MutablePrimitiveArray); -impl_arrow_assoc!(bool, ArrowDataType::Boolean, MutableBooleanArray); - -macro_rules! impl_arrow_assoc_vec { - ($T:ty, $PT:ty, $AT:expr) => { - impl ArrowAssoc for Vec<$T> { - type Builder = MutableListArray; - - fn builder(nrows: usize) -> Self::Builder { - MutableListArray::::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - let val: Vec> = value.into_iter().map(|v| Some(v)).collect(); - builder.try_push(Some(val)).unwrap(); - } - - fn field(header: &str) -> Field { - Field::new( - header, - ArrowDataType::LargeList(Box::new(Field::new("", $AT, false))), - false, - ) - } - } - - impl ArrowAssoc for Option> { - type Builder = MutableListArray; - - fn builder(nrows: usize) -> Self::Builder { - MutableListArray::::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - match value { - Some(values) => { - let val: Vec> = values.into_iter().map(|v| Some(v)).collect(); - builder.try_push(Some(val)).unwrap(); - } - None => builder.push_null(), - } - } - - fn field(header: &str) -> Field { - Field::new( - header, - ArrowDataType::LargeList(Box::new(Field::new("", $AT, false))), - true, - ) - } - } - }; -} - -macro_rules! impl_arrow_assoc_primitive_vec { - ($T:ty, $AT:expr) => { - impl_arrow_assoc_vec!($T, MutablePrimitiveArray<$T>, $AT); - }; -} - -impl_arrow_assoc_vec!(bool, MutableBooleanArray, ArrowDataType::Boolean); -impl_arrow_assoc_primitive_vec!(i32, ArrowDataType::Int32); -impl_arrow_assoc_primitive_vec!(i64, ArrowDataType::Int64); -impl_arrow_assoc_primitive_vec!(u32, ArrowDataType::UInt32); -impl_arrow_assoc_primitive_vec!(u64, ArrowDataType::UInt64); -impl_arrow_assoc_primitive_vec!(f32, ArrowDataType::Float32); -impl_arrow_assoc_primitive_vec!(f64, ArrowDataType::Float64); - -impl ArrowAssoc for &str { - type Builder = MutableUtf8Array; - - fn builder(nrows: usize) -> Self::Builder { - MutableUtf8Array::::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - builder.push(Some(value)); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeUtf8, false) - } -} - -impl ArrowAssoc for Option<&str> { - type Builder = MutableUtf8Array; - - fn builder(nrows: usize) -> Self::Builder { - MutableUtf8Array::::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - builder.push(value); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeUtf8, true) - } -} - -impl ArrowAssoc for String { - type Builder = MutableUtf8Array; - - fn builder(nrows: usize) -> Self::Builder { - MutableUtf8Array::::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: String) { - builder.push(Some(value)); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeUtf8, false) - } -} - -impl ArrowAssoc for Option { - type Builder = MutableUtf8Array; - - fn builder(nrows: usize) -> Self::Builder { - MutableUtf8Array::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - builder.push(value); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeUtf8, true) - } -} - -impl ArrowAssoc for DateTime { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Timestamp( - TimeUnit::Nanosecond, - Some("UTC".to_string()), - )) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: DateTime) { - builder.push(Some(value).map(|x| { - x.timestamp_nanos_opt() - .unwrap_or_else(|| panic!("out of range DateTime")) - })); - } - - fn field(header: &str) -> Field { - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_string())), - true, - ) - } -} - -impl ArrowAssoc for Option> { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Timestamp( - TimeUnit::Nanosecond, - Some("UTC".to_string()), - )) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Option>) { - builder.push(value.map(|x| { - x.timestamp_nanos_opt() - .unwrap_or_else(|| panic!("out of range DateTime")) - })); - } - - fn field(header: &str) -> Field { - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_string())), - false, - ) - } -} - -impl ArrowAssoc for DateTimeWrapperMicro { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Timestamp( - TimeUnit::Microsecond, - Some("UTC".to_string()), - )) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: DateTimeWrapperMicro) { - builder.push(Some(value).map(|x| x.0.timestamp_micros())); - } - - fn field(header: &str) -> Field { - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string())), - true, - ) - } -} - -impl ArrowAssoc for Option { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Timestamp( - TimeUnit::Microsecond, - Some("UTC".to_string()), - )) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Option) { - builder.push(value.map(|x| x.0.timestamp_micros())); - } - - fn field(header: &str) -> Field { - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string())), - false, - ) - } -} - -fn naive_date_to_date32(nd: NaiveDate) -> i32 { - match nd.and_hms_opt(0, 0, 0) { - Some(dt) => (dt.and_utc().timestamp() / SECONDS_IN_DAY) as i32, - None => panic!("and_hms_opt got None from {:?}", nd), - } -} - -fn naive_time_to_time64_micros(nd: NaiveTime) -> i64 { - nd.num_seconds_from_midnight() as i64 * 1_000_000 + (nd.nanosecond() as i64 / 1000) -} - -fn naive_time_to_time64_nanos(nd: NaiveTime) -> i64 { - nd.num_seconds_from_midnight() as i64 * 1_000_000_000 + nd.nanosecond() as i64 -} - -impl ArrowAssoc for Option { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Date32) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Option) { - builder.push(value.map(naive_date_to_date32)); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::Date32, true) - } -} - -impl ArrowAssoc for NaiveDate { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Date32) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: NaiveDate) { - builder.push(Some(naive_date_to_date32(value))); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::Date32, false) - } -} - -impl ArrowAssoc for Option { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - // naive => None - MutablePrimitiveArray::with_capacity(nrows) - .to(ArrowDataType::Timestamp(TimeUnit::Microsecond, None)) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Option) { - builder.push(value.map(|x| x.0.and_utc().timestamp_micros())); - } - - fn field(header: &str) -> Field { - // naive => None - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Microsecond, None), - true, - ) - } -} - -impl ArrowAssoc for NaiveDateTimeWrapperMicro { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - // naive => None - MutablePrimitiveArray::with_capacity(nrows) - .to(ArrowDataType::Timestamp(TimeUnit::Microsecond, None)) - } - - fn push(builder: &mut Self::Builder, value: NaiveDateTimeWrapperMicro) { - builder.push(Some(value).map(|x| x.0.and_utc().timestamp_micros())); - } - - fn field(header: &str) -> Field { - // naive => None - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Microsecond, None), - true, - ) - } -} - -impl ArrowAssoc for Option { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - // naive => None - MutablePrimitiveArray::with_capacity(nrows) - .to(ArrowDataType::Timestamp(TimeUnit::Nanosecond, None)) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Option) { - builder.push(value.map(|x| { - x.and_utc() - .timestamp_nanos_opt() - .unwrap_or_else(|| panic!("out of range DateTime")) - })); - } - - fn field(header: &str) -> Field { - // naive => None - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Nanosecond, None), - true, - ) - } -} - -impl ArrowAssoc for NaiveDateTime { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - // naive => None - MutablePrimitiveArray::with_capacity(nrows) - .to(ArrowDataType::Timestamp(TimeUnit::Nanosecond, None)) - } - - fn push(builder: &mut Self::Builder, value: NaiveDateTime) { - builder.push(Some(value).map(|x| { - x.and_utc() - .timestamp_nanos_opt() - .unwrap_or_else(|| panic!("out of range DateTime")) - })); - } - - fn field(header: &str) -> Field { - // naive => None - Field::new( - header, - ArrowDataType::Timestamp(TimeUnit::Nanosecond, None), - true, - ) - } -} - -impl ArrowAssoc for Option { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Time64(TimeUnit::Microsecond)) - } - - fn push(builder: &mut Self::Builder, value: Option) { - builder.push(match value { - Some(val) => Some(naive_time_to_time64_micros(val.0)), - None => None, - }); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::Time64(TimeUnit::Microsecond), true) - } -} - -impl ArrowAssoc for NaiveTimeWrapperMicro { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Time64(TimeUnit::Microsecond)) - } - - fn push(builder: &mut Self::Builder, value: NaiveTimeWrapperMicro) { - builder.push(Some(value.0).map(naive_time_to_time64_nanos)); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::Time64(TimeUnit::Microsecond), false) - } -} - -impl ArrowAssoc for Option { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Time64(TimeUnit::Nanosecond)) - } - - fn push(builder: &mut Self::Builder, value: Option) { - builder.push(value.map(naive_time_to_time64_nanos)); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::Time64(TimeUnit::Nanosecond), true) - } -} - -impl ArrowAssoc for NaiveTime { - type Builder = MutablePrimitiveArray; - - fn builder(nrows: usize) -> Self::Builder { - MutablePrimitiveArray::with_capacity(nrows).to(ArrowDataType::Time64(TimeUnit::Nanosecond)) - } - - fn push(builder: &mut Self::Builder, value: NaiveTime) { - builder.push(Some(value).map(naive_time_to_time64_nanos)); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::Time64(TimeUnit::Nanosecond), false) - } -} - -impl ArrowAssoc for Option> { - type Builder = MutableBinaryArray; - - fn builder(nrows: usize) -> Self::Builder { - MutableBinaryArray::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - builder.push(value); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeBinary, true) - } -} - -impl ArrowAssoc for Vec { - type Builder = MutableBinaryArray; - - fn builder(nrows: usize) -> Self::Builder { - MutableBinaryArray::with_capacity(nrows) - } - - #[inline] - fn push(builder: &mut Self::Builder, value: Self) { - builder.push(Some(value)); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeBinary, false) - } -} - -impl ArrowAssoc for Option> { - type Builder = MutableListArray>; - - fn builder(nrows: usize) -> Self::Builder { - MutableListArray::with_capacity(nrows) - } - - fn push(builder: &mut Self::Builder, value: Self) { - let mut string_array: Vec> = vec![]; - match value { - Some(value) => { - for sub_value in value { - string_array.push(Some(sub_value)) - } - - builder.try_push(Some(string_array)).unwrap(); - } - None => { - builder.try_push(Some(string_array)).unwrap(); - } - }; - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeUtf8, true) - } -} - -impl ArrowAssoc for Vec { - type Builder = MutableListArray>; - - fn builder(nrows: usize) -> Self::Builder { - MutableListArray::with_capacity(nrows) - } - - fn push(builder: &mut Self::Builder, value: Self) { - let mut string_array: Vec> = vec![]; - for sub_value in value { - string_array.push(Some(sub_value)) - } - builder.try_push(Some(string_array)).unwrap(); - } - - fn field(header: &str) -> Field { - Field::new(header, ArrowDataType::LargeUtf8, false) - } -} diff --git a/connectorx/src/destinations/arrow2/errors.rs b/connectorx/src/destinations/arrow2/errors.rs deleted file mode 100644 index 36a76741ab..0000000000 --- a/connectorx/src/destinations/arrow2/errors.rs +++ /dev/null @@ -1,19 +0,0 @@ -use thiserror::Error; - -pub type Result = std::result::Result; - -#[derive(Error, Debug)] -pub enum Arrow2DestinationError { - #[error(transparent)] - ArrowError(#[from] arrow2::error::Error), - - #[error(transparent)] - PolarsError(#[from] polars_old::error::PolarsError), - - #[error(transparent)] - ConnectorXError(#[from] crate::errors::ConnectorXError), - - /// Any other errors that are too trivial to be put here explicitly. - #[error(transparent)] - Other(#[from] anyhow::Error), -} diff --git a/connectorx/src/destinations/arrow2/funcs.rs b/connectorx/src/destinations/arrow2/funcs.rs deleted file mode 100644 index 666f7bdee7..0000000000 --- a/connectorx/src/destinations/arrow2/funcs.rs +++ /dev/null @@ -1,76 +0,0 @@ -use super::arrow_assoc::ArrowAssoc; -use super::Builder; -use crate::errors::Result; -use crate::typesystem::{ParameterizedFunc, ParameterizedOn}; -use anyhow::anyhow; -use arrow2::array::{Array, MutableArray}; -use arrow2::datatypes::Field; - -pub struct FNewBuilder; - -impl ParameterizedFunc for FNewBuilder { - type Function = fn(nrows: usize) -> Builder; -} - -impl ParameterizedOn for FNewBuilder -where - T: ArrowAssoc, -{ - fn parameterize() -> Self::Function { - fn imp(nrows: usize) -> Builder - where - T: ArrowAssoc, - { - Box::new(T::builder(nrows)) as Builder - } - imp:: - } -} - -pub struct FFinishBuilder; - -impl ParameterizedFunc for FFinishBuilder { - type Function = fn(Builder) -> Result>; -} - -impl ParameterizedOn for FFinishBuilder -where - T: ArrowAssoc, -{ - fn parameterize() -> Self::Function { - fn imp(mut builder: Builder) -> Result> - where - T: ArrowAssoc, - { - builder.shrink_to_fit(); - Ok(MutableArray::as_box( - builder - .as_mut_any() - .downcast_mut::() - .ok_or_else(|| anyhow!("cannot cast arrow builder for finish"))?, - )) - } - imp:: - } -} - -pub struct FNewField; - -impl ParameterizedFunc for FNewField { - type Function = fn(header: &str) -> Field; -} - -impl ParameterizedOn for FNewField -where - T: ArrowAssoc, -{ - fn parameterize() -> Self::Function { - fn imp(header: &str) -> Field - where - T: ArrowAssoc, - { - T::field(header) - } - imp:: - } -} diff --git a/connectorx/src/destinations/arrow2/mod.rs b/connectorx/src/destinations/arrow2/mod.rs deleted file mode 100644 index d51d990c0a..0000000000 --- a/connectorx/src/destinations/arrow2/mod.rs +++ /dev/null @@ -1,284 +0,0 @@ -//! Destination implementation for Arrow2. - -mod arrow_assoc; -mod errors; -mod funcs; -pub mod typesystem; - -use super::{Consume, Destination, DestinationPartition}; -use crate::constants::RECORD_BATCH_SIZE; -use crate::data_order::DataOrder; -use crate::typesystem::{Realize, TypeAssoc, TypeSystem}; -use anyhow::anyhow; -use arrow2::array::{Array, MutableArray}; -use arrow2::chunk::Chunk; -use arrow2::datatypes::{Field, Schema}; -use arrow_assoc::ArrowAssoc; -pub use errors::{Arrow2DestinationError, Result}; -use fehler::throw; -use fehler::throws; -use funcs::{FFinishBuilder, FNewBuilder, FNewField}; -use polars_old::prelude::{DataFrame, PolarsError, Series}; -use std::convert::TryFrom; -use std::sync::{Arc, Mutex}; -pub use typesystem::Arrow2TypeSystem; - -type Builder = Box; -type Builders = Vec; -type ChunkBuffer = Arc>>>>; - -pub struct Arrow2Destination { - schema: Vec, - names: Vec, - data: ChunkBuffer, - arrow_schema: Arc, -} - -impl Default for Arrow2Destination { - fn default() -> Self { - Arrow2Destination { - schema: vec![], - names: vec![], - data: Arc::new(Mutex::new(vec![])), - arrow_schema: Arc::new(Schema::default()), - } - } -} - -impl Arrow2Destination { - pub fn new() -> Self { - Self::default() - } -} - -impl Destination for Arrow2Destination { - const DATA_ORDERS: &'static [DataOrder] = &[DataOrder::ColumnMajor, DataOrder::RowMajor]; - type TypeSystem = Arrow2TypeSystem; - type Partition<'a> = ArrowPartitionWriter; - type Error = Arrow2DestinationError; - - fn needs_count(&self) -> bool { - false - } - - #[throws(Arrow2DestinationError)] - fn allocate>( - &mut self, - _nrows: usize, - names: &[S], - schema: &[Arrow2TypeSystem], - data_order: DataOrder, - ) { - // todo: support colmajor - if !matches!(data_order, DataOrder::RowMajor) { - throw!(crate::errors::ConnectorXError::UnsupportedDataOrder( - data_order - )) - } - - // parse the metadata - self.schema = schema.to_vec(); - self.names = names.iter().map(|n| n.as_ref().to_string()).collect(); - let fields = self - .schema - .iter() - .zip(&self.names) - .map(|(&dt, h)| Ok(Realize::::realize(dt)?(h.as_str()))) - .collect::>>()?; - self.arrow_schema = Arc::new(Schema::from(fields)); - } - - #[throws(Arrow2DestinationError)] - fn partition(&mut self, counts: usize) -> Vec> { - let mut partitions = vec![]; - for _ in 0..counts { - partitions.push(ArrowPartitionWriter::new( - self.schema.clone(), - Arc::clone(&self.data), - )?); - } - partitions - } - - fn schema(&self) -> &[Arrow2TypeSystem] { - self.schema.as_slice() - } -} - -impl Arrow2Destination { - #[throws(Arrow2DestinationError)] - pub fn arrow(self) -> (Vec>>, Arc) { - let lock = Arc::try_unwrap(self.data).map_err(|_| anyhow!("Partitions are not freed"))?; - ( - lock.into_inner() - .map_err(|e| anyhow!("mutex poisoned {}", e))?, - self.arrow_schema, - ) - } - - #[throws(Arrow2DestinationError)] - pub fn polars(self) -> DataFrame { - let (rbs, schema): (Vec>>, Arc) = self.arrow()?; - //let fields = schema.fields.as_slice(); - let fields: &[Field] = schema.fields.as_slice(); - - // This should be in polars but their version needs updating. - // Whave placed this here contained in an inner function until the fix is merged upstream - fn try_from( - chunks: (Vec>>, &[Field]), - ) -> std::result::Result { - use polars_old::prelude::NamedFrom; - - let mut series: Vec = vec![]; - - for chunk in chunks.0.into_iter() { - let columns_results: std::result::Result, PolarsError> = chunk - .into_arrays() - .into_iter() - .zip(chunks.1) - .map(|(arr, field)| { - let a = Series::try_from((field.name.as_str(), arr)).map_err(|_| { - PolarsError::ComputeError("Couldn't build Series from box".into()) - }); - a - }) - .collect(); - - let columns = columns_results?; - - if series.is_empty() { - for col in columns.iter() { - let name = col.name().to_string(); - series.push(Series::new(&name, col)); - } - continue; - } - - for (i, col) in columns.into_iter().enumerate() { - series[i].append(&col)?; - } - } - - DataFrame::new(series) - } - - try_from((rbs, fields)).unwrap() - } -} - -pub struct ArrowPartitionWriter { - schema: Vec, - builders: Option, - current_row: usize, - current_col: usize, - data: ChunkBuffer, -} - -impl ArrowPartitionWriter { - #[throws(Arrow2DestinationError)] - fn new(schema: Vec, data: ChunkBuffer) -> Self { - let mut pw = ArrowPartitionWriter { - schema, - builders: None, - current_row: 0, - current_col: 0, - data, - }; - pw.allocate()?; - pw - } - - #[throws(Arrow2DestinationError)] - fn allocate(&mut self) { - let builders = self - .schema - .iter() - .map(|&dt| Ok(Realize::::realize(dt)?(RECORD_BATCH_SIZE))) - .collect::>>()?; - self.builders.replace(builders); - } - - #[throws(Arrow2DestinationError)] - fn flush(&mut self) { - let builders = self - .builders - .take() - .unwrap_or_else(|| panic!("arrow builder is none when flush!")); - - let columns = builders - .into_iter() - .zip(self.schema.iter()) - .map(|(builder, &dt)| Realize::::realize(dt)?(builder)) - .collect::>, crate::errors::ConnectorXError>>( - )?; - - let rb = Chunk::try_new(columns)?; - { - let mut guard = self - .data - .lock() - .map_err(|e| anyhow!("mutex poisoned {}", e))?; - let inner_data = &mut *guard; - inner_data.push(rb); - } - self.current_row = 0; - self.current_col = 0; - } -} - -impl<'a> DestinationPartition<'a> for ArrowPartitionWriter { - type TypeSystem = Arrow2TypeSystem; - type Error = Arrow2DestinationError; - - fn ncols(&self) -> usize { - self.schema.len() - } - - #[throws(Arrow2DestinationError)] - fn finalize(&mut self) { - if self.builders.is_some() { - self.flush()?; - } - } - - #[throws(Arrow2DestinationError)] - fn aquire_row(&mut self, _n: usize) -> usize { - self.current_row - } -} - -impl<'a, T> Consume for ArrowPartitionWriter -where - T: TypeAssoc<>::TypeSystem> + ArrowAssoc + 'static, -{ - type Error = Arrow2DestinationError; - - #[throws(Arrow2DestinationError)] - fn consume(&mut self, value: T) { - let col = self.current_col; - self.current_col = (self.current_col + 1) % self.ncols(); - self.schema[col].check::()?; - - match &mut self.builders { - Some(builders) => { - ::push( - builders[col] - .as_mut_any() - .downcast_mut::() - .ok_or_else(|| anyhow!("cannot cast arrow builder for append"))?, - value, - ); - } - None => throw!(anyhow!("arrow arrays are empty!")), - } - - // flush if exceed batch_size - if self.current_col == 0 { - self.current_row += 1; - if self.current_row >= RECORD_BATCH_SIZE { - self.flush()?; - self.allocate()?; - } - } - } -} diff --git a/connectorx/src/destinations/arrow2/typesystem.rs b/connectorx/src/destinations/arrow2/typesystem.rs deleted file mode 100644 index 5aa4a60845..0000000000 --- a/connectorx/src/destinations/arrow2/typesystem.rs +++ /dev/null @@ -1,69 +0,0 @@ -use crate::impl_typesystem; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; - -#[derive(Debug, Clone, Copy)] -pub struct DateTimeWrapperMicro(pub DateTime); - -#[derive(Debug, Clone, Copy)] -pub struct NaiveTimeWrapperMicro(pub NaiveTime); - -#[derive(Debug, Clone, Copy)] -pub struct NaiveDateTimeWrapperMicro(pub NaiveDateTime); - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum Arrow2TypeSystem { - Int32(bool), - Int64(bool), - UInt32(bool), - UInt64(bool), - Float32(bool), - Float64(bool), - Boolean(bool), - LargeUtf8(bool), - LargeBinary(bool), - Date32(bool), - Date64(bool), - Date64Micro(bool), - Time64(bool), - Time64Micro(bool), - DateTimeTz(bool), - DateTimeTzMicro(bool), - BoolArray(bool), - Int32Array(bool), - Int64Array(bool), - UInt32Array(bool), - UInt64Array(bool), - Float32Array(bool), - Float64Array(bool), - Utf8Array(bool), -} - -impl_typesystem! { - system = Arrow2TypeSystem, - mappings = { - { Int32 => i32 } - { Int64 => i64 } - { UInt32 => u32 } - { UInt64 => u64 } - { Float64 => f64 } - { Float32 => f32 } - { Boolean => bool } - { LargeUtf8 => String } - { LargeBinary => Vec } - { Date32 => NaiveDate } - { Date64 => NaiveDateTime } - { Date64Micro => NaiveDateTimeWrapperMicro } - { Time64 => NaiveTime } - { Time64Micro => NaiveTimeWrapperMicro } - { DateTimeTz => DateTime } - { DateTimeTzMicro => DateTimeWrapperMicro } - { BoolArray => Vec } - { Int32Array => Vec } - { Int64Array => Vec } - { UInt32Array => Vec } - { UInt64Array => Vec } - { Float32Array => Vec } - { Float64Array => Vec } - { Utf8Array => Vec } - } -} diff --git a/connectorx/src/destinations/mod.rs b/connectorx/src/destinations/mod.rs index 0082b9d846..4cce75d683 100644 --- a/connectorx/src/destinations/mod.rs +++ b/connectorx/src/destinations/mod.rs @@ -6,9 +6,6 @@ pub mod arrow; #[cfg(feature = "dst_arrow")] pub mod arrowstream; -#[cfg(feature = "dst_arrow2")] -pub mod arrow2; - use crate::data_order::DataOrder; use crate::errors::ConnectorXError; use crate::typesystem::{TypeAssoc, TypeSystem}; diff --git a/connectorx/src/errors.rs b/connectorx/src/errors.rs index f0719bb75e..f06bd4aecc 100644 --- a/connectorx/src/errors.rs +++ b/connectorx/src/errors.rs @@ -94,66 +94,34 @@ pub enum ConnectorXOutError { #[error(transparent)] ArrowStreamError(#[from] crate::destinations::arrowstream::ArrowDestinationError), - #[cfg(feature = "dst_arrow2")] - #[error(transparent)] - Arrow2Error(#[from] crate::destinations::arrow2::Arrow2DestinationError), - #[cfg(all(feature = "src_postgres", feature = "dst_arrow"))] #[error(transparent)] PostgresArrowTransportError(#[from] crate::transports::PostgresArrowTransportError), - #[cfg(all(feature = "src_postgres", feature = "dst_arrow2"))] - #[error(transparent)] - PostgresArrow2TransportError(#[from] crate::transports::PostgresArrow2TransportError), - #[cfg(all(feature = "src_mysql", feature = "dst_arrow"))] #[error(transparent)] MySQLArrowTransportError(#[from] crate::transports::MySQLArrowTransportError), - #[cfg(all(feature = "src_mysql", feature = "dst_arrow2"))] - #[error(transparent)] - MySQLArrow2TransportError(#[from] crate::transports::MySQLArrow2TransportError), - #[cfg(all(feature = "src_sqlite", feature = "dst_arrow"))] #[error(transparent)] SQLiteArrowTransportError(#[from] crate::transports::SQLiteArrowTransportError), - #[cfg(all(feature = "src_sqlite", feature = "dst_arrow2"))] - #[error(transparent)] - SQLiteArrow2TransportError(#[from] crate::transports::SQLiteArrow2TransportError), - #[cfg(all(feature = "src_mssql", feature = "dst_arrow"))] #[error(transparent)] MsSQLArrowTransportError(#[from] crate::transports::MsSQLArrowTransportError), - #[cfg(all(feature = "src_mssql", feature = "dst_arrow2"))] - #[error(transparent)] - MsSQLArrow2TransportError(#[from] crate::transports::MsSQLArrow2TransportError), - #[cfg(all(feature = "src_oracle", feature = "dst_arrow"))] #[error(transparent)] OracleArrowTransportError(#[from] crate::transports::OracleArrowTransportError), - #[cfg(all(feature = "src_oracle", feature = "dst_arrow2"))] - #[error(transparent)] - OracleArrow2TransportError(#[from] crate::transports::OracleArrow2TransportError), - #[cfg(all(feature = "src_bigquery", feature = "dst_arrow"))] #[error(transparent)] BigqueryArrowTransportError(#[from] crate::transports::BigQueryArrowTransportError), - #[cfg(all(feature = "src_bigquery", feature = "dst_arrow2"))] - #[error(transparent)] - BigqueryArrow2TransportError(#[from] crate::transports::BigQueryArrow2TransportError), - #[cfg(all(feature = "src_trino", feature = "dst_arrow"))] #[error(transparent)] TrinoArrowTransportError(#[from] crate::transports::TrinoArrowTransportError), - #[cfg(all(feature = "src_trino", feature = "dst_arrow2"))] - #[error(transparent)] - TrinoArrow2TransportError(#[from] crate::transports::TrinoArrow2TransportError), - /// Any other errors that are too trivial to be put here explicitly. #[error(transparent)] Other(#[from] anyhow::Error), diff --git a/connectorx/src/get_arrow2.rs b/connectorx/src/get_arrow2.rs deleted file mode 100644 index f69fc9b697..0000000000 --- a/connectorx/src/get_arrow2.rs +++ /dev/null @@ -1,243 +0,0 @@ -#[cfg(feature = "src_mysql")] -use crate::sources::mysql::{BinaryProtocol as MySQLBinaryProtocol, TextProtocol}; -#[cfg(feature = "src_postgres")] -use crate::sources::postgres::{ - rewrite_tls_args, BinaryProtocol as PgBinaryProtocol, CSVProtocol, CursorProtocol, - SimpleProtocol, -}; -use crate::{prelude::*, sql::CXQuery}; -use fehler::{throw, throws}; -use log::debug; -#[cfg(feature = "src_postgres")] -use postgres::NoTls; -#[cfg(feature = "src_postgres")] -use postgres_openssl::MakeTlsConnector; -#[allow(unused_imports)] -use std::sync::Arc; - -#[allow(unreachable_code, unreachable_patterns, unused_variables, unused_mut)] -#[throws(ConnectorXOutError)] -pub fn get_arrow2( - source_conn: &SourceConn, - origin_query: Option, - queries: &[CXQuery], -) -> Arrow2Destination { - let mut destination = Arrow2Destination::new(); - let protocol = source_conn.proto.as_str(); - debug!("Protocol: {}", protocol); - - match source_conn.ty { - #[cfg(feature = "src_postgres")] - SourceType::Postgres => { - let (config, tls) = rewrite_tls_args(&source_conn.conn)?; - match (protocol, tls) { - ("csv", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresArrow2Transport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("csv", None) => { - let sb = - PostgresSource::::new(config, NoTls, queries.len())?; - let dispatcher = - Dispatcher::<_, _, PostgresArrow2Transport>::new( - sb, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - ("binary", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = - Dispatcher::< - _, - _, - PostgresArrow2Transport, - >::new(sb, &mut destination, queries, origin_query); - dispatcher.run()?; - } - ("binary", None) => { - let sb = PostgresSource::::new( - config, - NoTls, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresArrow2Transport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("cursor", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresArrow2Transport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("cursor", None) => { - let sb = - PostgresSource::::new(config, NoTls, queries.len())?; - let dispatcher = Dispatcher::< - _, - _, - PostgresArrow2Transport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("simple", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresArrow2Transport, - >::new( - sb, &mut destination, queries, origin_query - ); - debug!("Running dispatcher"); - dispatcher.run()?; - } - ("simple", None) => { - let sb = - PostgresSource::::new(config, NoTls, queries.len())?; - let dispatcher = Dispatcher::< - _, - _, - PostgresArrow2Transport, - >::new( - sb, &mut destination, queries, origin_query - ); - debug!("Running dispatcher"); - dispatcher.run()?; - } - - _ => unimplemented!("{} protocol not supported", protocol), - } - } - #[cfg(feature = "src_mysql")] - SourceType::MySQL => match protocol { - "binary" => { - let source = - MySQLSource::::new(&source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, MySQLArrow2Transport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - "text" => { - let source = - MySQLSource::::new(&source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, MySQLArrow2Transport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - _ => unimplemented!("{} protocol not supported", protocol), - }, - #[cfg(feature = "src_sqlite")] - SourceType::SQLite => { - // remove the first "sqlite://" manually since url.path is not correct for windows - let path = &source_conn.conn.as_str()[9..]; - let source = SQLiteSource::new(path, queries.len())?; - let dispatcher = Dispatcher::<_, _, SQLiteArrow2Transport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - #[cfg(feature = "src_mssql")] - SourceType::MsSQL => { - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - let source = MsSQLSource::new(rt, &source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, MsSQLArrow2Transport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - #[cfg(feature = "src_oracle")] - SourceType::Oracle => { - let source = OracleSource::new(&source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, OracleArrow2Transport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - #[cfg(feature = "src_bigquery")] - SourceType::BigQuery => { - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - let source = BigQuerySource::new(rt, &source_conn.conn[..])?; - let dispatcher = Dispatcher::<_, _, BigQueryArrow2Transport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - #[cfg(feature = "src_trino")] - SourceType::Trino => { - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - let source = TrinoSource::new(rt, &source_conn.conn[..])?; - let dispatcher = Dispatcher::<_, _, TrinoArrow2Transport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - _ => throw!(ConnectorXOutError::SourceNotSupport(format!( - "{:?}", - source_conn.ty - ))), - } - - destination -} diff --git a/connectorx/src/lib.rs b/connectorx/src/lib.rs index 5b4ce73868..a9c963af9a 100644 --- a/connectorx/src/lib.rs +++ b/connectorx/src/lib.rs @@ -100,7 +100,7 @@ //! let data = destination.arrow(); //! ``` //! -//! Or simply you can directly use the [`get_arrow::get_arrow`] or [`get_arrow2::get_arrow2`] in which we wrapped the above procedures: +//! Or simply you can directly use the [`get_arrow::get_arrow`] in which we wrapped the above procedures: //! //! ```no_run //! use connectorx::prelude::*; @@ -133,11 +133,11 @@ //! //! ## Destinations //! - [x] Arrow -//! - [x] Arrow2 +//! - [x] Polars //! //! # Feature gates //! By default, ConnectorX does not enable any sources / destinations to keep the dependencies minimal. -//! Instead, we provide following features for you to opt-in: `src_sqlite`, `src_postgres`, `src_mysql`, `src_mssql`, `src_oracle`, `dst_arrow`, `dst_arrow2`. +//! Instead, we provide following features for you to opt-in: `src_sqlite`, `src_postgres`, `src_mysql`, `src_mssql`, `src_oracle`, `dst_arrow`, `dst_polars`. //! For example, if you'd like to load data from Postgres to Arrow, you can enable `src_postgres` and `dst_arrow` in `Cargo.toml`. //! This will enable [`sources::postgres`], [`destinations::arrow`] and [`transports::PostgresArrowTransport`]. @@ -157,8 +157,6 @@ pub mod fed_dispatcher; pub mod fed_rewriter; #[cfg(feature = "dst_arrow")] pub mod get_arrow; -#[cfg(feature = "dst_arrow2")] -pub mod get_arrow2; pub mod partition; pub mod source_router; pub mod sources; @@ -174,8 +172,6 @@ pub mod prelude { pub use crate::data_order::{coordinate, DataOrder}; #[cfg(feature = "dst_arrow")] pub use crate::destinations::arrow::{ArrowDestination, ArrowPartitionWriter, ArrowTypeSystem}; - #[cfg(feature = "dst_arrow2")] - pub use crate::destinations::arrow2::Arrow2Destination; #[cfg(feature = "dst_arrow")] pub use crate::destinations::arrowstream::{ ArrowDestination as ArrowStreamDestination, @@ -189,8 +185,6 @@ pub mod prelude { pub use crate::fed_rewriter::{rewrite_sql, FederatedDataSourceInfo, Plan}; #[cfg(feature = "dst_arrow")] pub use crate::get_arrow::{get_arrow, new_record_batch_iter}; - #[cfg(feature = "dst_arrow2")] - pub use crate::get_arrow2::get_arrow2; pub use crate::source_router::*; #[cfg(feature = "src_bigquery")] pub use crate::sources::bigquery::BigQuerySource; diff --git a/connectorx/src/transports/bigquery_arrow2.rs b/connectorx/src/transports/bigquery_arrow2.rs deleted file mode 100644 index ef5308b463..0000000000 --- a/connectorx/src/transports/bigquery_arrow2.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! Transport from BigQuery Source to Arrow Destination. - -use crate::{ - destinations::arrow2::{ - typesystem::Arrow2TypeSystem, Arrow2Destination, Arrow2DestinationError, - }, - impl_transport, - sources::bigquery::{BigQuerySource, BigQuerySourceError, BigQueryTypeSystem}, - typesystem::TypeConversion, -}; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum BigQueryArrow2TransportError { - #[error(transparent)] - Source(#[from] BigQuerySourceError), - - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -/// Convert BigQuery data types to Arrow data types. -pub struct BigQueryArrow2Transport; - -impl_transport!( - name = BigQueryArrow2Transport, - error = BigQueryArrow2TransportError, - systems = BigQueryTypeSystem => Arrow2TypeSystem, - route = BigQuerySource => Arrow2Destination, - mappings = { - { Bool[bool] => Boolean[bool] | conversion auto } - { Boolean[bool] => Boolean[bool] | conversion none } - { Int64[i64] => Int64[i64] | conversion auto } - { Integer[i64] => Int64[i64] | conversion none } - { Float64[f64] => Float64[f64] | conversion auto } - { Float[f64] => Float64[f64] | conversion none } - { Numeric[f64] => Float64[f64] | conversion none } - { Bignumeric[f64] => Float64[f64] | conversion none } - { String[String] => LargeUtf8[String] | conversion auto } - { Bytes[String] => LargeUtf8[String] | conversion none } - { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } - { Datetime[NaiveDateTime] => Date64[NaiveDateTime] | conversion auto } - { Time[NaiveTime] => Time64[NaiveTime] | conversion auto } - { Timestamp[DateTime] => DateTimeTz[DateTime] | conversion auto } - } -); diff --git a/connectorx/src/transports/dummy_arrow2.rs b/connectorx/src/transports/dummy_arrow2.rs deleted file mode 100644 index 1d8d484181..0000000000 --- a/connectorx/src/transports/dummy_arrow2.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! Transport from Dummy Source to Arrow2 Destination. - -use crate::destinations::arrow2::{Arrow2Destination, Arrow2DestinationError, Arrow2TypeSystem}; -use crate::sources::dummy::{DummySource, DummyTypeSystem}; -use crate::typesystem::TypeConversion; -use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc}; -use thiserror::Error; - -/// Convert Dummy data types to Arrow2 data types. -pub struct DummyArrow2Transport; - -#[derive(Error, Debug)] -pub enum DummyArrow2TransportError { - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -impl_transport!( - name = DummyArrow2Transport, - error = DummyArrow2TransportError, - systems = DummyTypeSystem => Arrow2TypeSystem, - route = DummySource => Arrow2Destination, - mappings = { - { F64[f64] => Float64[f64] | conversion auto} - { I64[i64] => Int64[i64] | conversion auto} - { Bool[bool] => Boolean[bool] | conversion auto} - { String[String] => LargeUtf8[String] | conversion auto} - { DateTime[DateTime] => Date64[NaiveDateTime] | conversion option} - } -); - -impl TypeConversion, NaiveDateTime> for DummyArrow2Transport { - fn convert(val: DateTime) -> NaiveDateTime { - val.naive_utc() - } -} - -impl TypeConversion> for DummyArrow2Transport { - fn convert(val: NaiveDateTime) -> DateTime { - DateTime::from_naive_utc_and_offset(val, Utc) - } -} - -impl TypeConversion> for DummyArrow2Transport { - fn convert(val: NaiveDate) -> DateTime { - DateTime::from_naive_utc_and_offset( - val.and_hms_opt(0, 0, 0) - .unwrap_or_else(|| panic!("from_hms_opt return None")), - Utc, - ) - } -} diff --git a/connectorx/src/transports/mod.rs b/connectorx/src/transports/mod.rs index 96f90db44e..85d3a47b39 100644 --- a/connectorx/src/transports/mod.rs +++ b/connectorx/src/transports/mod.rs @@ -2,58 +2,40 @@ #[cfg(all(feature = "src_bigquery", feature = "dst_arrow"))] mod bigquery_arrow; -#[cfg(all(feature = "src_bigquery", feature = "dst_arrow2"))] -mod bigquery_arrow2; #[cfg(all(feature = "src_bigquery", feature = "dst_arrow"))] mod bigquery_arrowstream; #[cfg(all(feature = "src_csv", feature = "dst_arrow"))] mod csv_arrow; #[cfg(all(feature = "src_dummy", feature = "dst_arrow"))] mod dummy_arrow; -#[cfg(all(feature = "src_dummy", feature = "dst_arrow2"))] -mod dummy_arrow2; #[cfg(all(feature = "src_dummy", feature = "dst_arrow"))] mod dummy_arrowstream; #[cfg(all(feature = "src_mssql", feature = "dst_arrow"))] mod mssql_arrow; -#[cfg(all(feature = "src_mssql", feature = "dst_arrow2"))] -mod mssql_arrow2; #[cfg(all(feature = "src_mssql", feature = "dst_arrow"))] mod mssql_arrowstream; #[cfg(all(feature = "src_mysql", feature = "dst_arrow"))] mod mysql_arrow; -#[cfg(all(feature = "src_mysql", feature = "dst_arrow2"))] -mod mysql_arrow2; #[cfg(all(feature = "src_mysql", feature = "dst_arrow"))] mod mysql_arrowstream; #[cfg(all(feature = "src_oracle", feature = "dst_arrow"))] mod oracle_arrow; -#[cfg(all(feature = "src_oracle", feature = "dst_arrow2"))] -mod oracle_arrow2; #[cfg(all(feature = "src_oracle", feature = "dst_arrow"))] mod oracle_arrowstream; #[cfg(all(feature = "src_postgres", feature = "dst_arrow"))] mod postgres_arrow; -#[cfg(all(feature = "src_postgres", feature = "dst_arrow2"))] -mod postgres_arrow2; #[cfg(all(feature = "src_postgres", feature = "dst_arrow"))] mod postgres_arrowstream; #[cfg(all(feature = "src_sqlite", feature = "dst_arrow"))] mod sqlite_arrow; -#[cfg(all(feature = "src_sqlite", feature = "dst_arrow2"))] -mod sqlite_arrow2; #[cfg(all(feature = "src_sqlite", feature = "dst_arrow"))] mod sqlite_arrowstream; #[cfg(all(feature = "src_trino", feature = "dst_arrow"))] mod trino_arrow; -#[cfg(all(feature = "src_trino", feature = "dst_arrow2"))] -mod trino_arrow2; #[cfg(all(feature = "src_trino", feature = "dst_arrow"))] mod trino_arrowstream; #[cfg(all(feature = "src_bigquery", feature = "dst_arrow"))] pub use bigquery_arrow::{BigQueryArrowTransport, BigQueryArrowTransportError}; -#[cfg(all(feature = "src_bigquery", feature = "dst_arrow2"))] -pub use bigquery_arrow2::{BigQueryArrow2Transport, BigQueryArrow2TransportError}; #[cfg(all(feature = "src_bigquery", feature = "dst_arrow"))] pub use bigquery_arrowstream::{ BigQueryArrowTransport as BigQueryArrowStreamTransport, @@ -63,12 +45,8 @@ pub use bigquery_arrowstream::{ pub use csv_arrow::CSVArrowTransport; #[cfg(all(feature = "src_dummy", feature = "dst_arrow"))] pub use dummy_arrow::DummyArrowTransport; -#[cfg(all(feature = "src_dummy", feature = "dst_arrow2"))] -pub use dummy_arrow2::DummyArrow2Transport; #[cfg(all(feature = "src_mssql", feature = "dst_arrow"))] pub use mssql_arrow::{MsSQLArrowTransport, MsSQLArrowTransportError}; -#[cfg(all(feature = "src_mssql", feature = "dst_arrow2"))] -pub use mssql_arrow2::{MsSQLArrow2Transport, MsSQLArrow2TransportError}; #[cfg(all(feature = "src_mssql", feature = "dst_arrow"))] pub use mssql_arrowstream::{ MsSQLArrowTransport as MsSQLArrowStreamTransport, @@ -76,8 +54,6 @@ pub use mssql_arrowstream::{ }; #[cfg(all(feature = "src_mysql", feature = "dst_arrow"))] pub use mysql_arrow::{MySQLArrowTransport, MySQLArrowTransportError}; -#[cfg(all(feature = "src_mysql", feature = "dst_arrow2"))] -pub use mysql_arrow2::{MySQLArrow2Transport, MySQLArrow2TransportError}; #[cfg(all(feature = "src_mysql", feature = "dst_arrow"))] pub use mysql_arrowstream::{ MySQLArrowTransport as MySQLArrowStreamTransport, @@ -85,8 +61,6 @@ pub use mysql_arrowstream::{ }; #[cfg(all(feature = "src_oracle", feature = "dst_arrow"))] pub use oracle_arrow::{OracleArrowTransport, OracleArrowTransportError}; -#[cfg(all(feature = "src_oracle", feature = "dst_arrow2"))] -pub use oracle_arrow2::{OracleArrow2Transport, OracleArrow2TransportError}; #[cfg(all(feature = "src_oracle", feature = "dst_arrow"))] pub use oracle_arrowstream::{ OracleArrowTransport as OracleArrowStreamTransport, @@ -94,8 +68,6 @@ pub use oracle_arrowstream::{ }; #[cfg(all(feature = "src_postgres", feature = "dst_arrow"))] pub use postgres_arrow::{PostgresArrowTransport, PostgresArrowTransportError}; -#[cfg(all(feature = "src_postgres", feature = "dst_arrow2"))] -pub use postgres_arrow2::{PostgresArrow2Transport, PostgresArrow2TransportError}; #[cfg(all(feature = "src_postgres", feature = "dst_arrow"))] pub use postgres_arrowstream::{ PostgresArrowTransport as PostgresArrowStreamTransport, @@ -103,8 +75,6 @@ pub use postgres_arrowstream::{ }; #[cfg(all(feature = "src_sqlite", feature = "dst_arrow"))] pub use sqlite_arrow::{SQLiteArrowTransport, SQLiteArrowTransportError}; -#[cfg(all(feature = "src_sqlite", feature = "dst_arrow2"))] -pub use sqlite_arrow2::{SQLiteArrow2Transport, SQLiteArrow2TransportError}; #[cfg(all(feature = "src_sqlite", feature = "dst_arrow"))] pub use sqlite_arrowstream::{ SQLiteArrowTransport as SQLiteArrowStreamTransport, @@ -112,8 +82,6 @@ pub use sqlite_arrowstream::{ }; #[cfg(all(feature = "src_trino", feature = "dst_arrow"))] pub use trino_arrow::{TrinoArrowTransport, TrinoArrowTransportError}; -#[cfg(all(feature = "src_trino", feature = "dst_arrow2"))] -pub use trino_arrow2::{TrinoArrow2Transport, TrinoArrow2TransportError}; #[cfg(all(feature = "src_trino", feature = "dst_arrow"))] pub use trino_arrowstream::{ TrinoArrowTransport as TrinoArrowStreamTransport, diff --git a/connectorx/src/transports/mssql_arrow2.rs b/connectorx/src/transports/mssql_arrow2.rs deleted file mode 100644 index 5464367532..0000000000 --- a/connectorx/src/transports/mssql_arrow2.rs +++ /dev/null @@ -1,109 +0,0 @@ -//! Transport from MsSQL Source to Arrow2 Destination. - -use crate::destinations::arrow2::{ - typesystem::{DateTimeWrapperMicro, NaiveDateTimeWrapperMicro, NaiveTimeWrapperMicro}, - Arrow2Destination, Arrow2DestinationError, Arrow2TypeSystem, -}; -use crate::sources::mssql::{FloatN, IntN, MsSQLSource, MsSQLSourceError, MsSQLTypeSystem}; -use crate::typesystem::TypeConversion; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use num_traits::ToPrimitive; -use rust_decimal::Decimal; -use thiserror::Error; -use uuid::Uuid; - -/// Convert MsSQL data types to Arrow2 data types. -pub struct MsSQLArrow2Transport; - -#[derive(Error, Debug)] -pub enum MsSQLArrow2TransportError { - #[error(transparent)] - Source(#[from] MsSQLSourceError), - - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -impl_transport!( - name = MsSQLArrow2Transport, - error = MsSQLArrow2TransportError, - systems = MsSQLTypeSystem => Arrow2TypeSystem, - route = MsSQLSource => Arrow2Destination, - mappings = { - { Tinyint[u8] => Int32[i32] | conversion auto } - { Smallint[i16] => Int32[i32] | conversion auto } - { Int[i32] => Int32[i32] | conversion auto } - { Bigint[i64] => Int64[i64] | conversion auto } - { Intn[IntN] => Int64[i64] | conversion option } - { Float24[f32] => Float32[f32] | conversion auto } - { Float53[f64] => Float64[f64] | conversion auto } - { Floatn[FloatN] => Float64[f64] | conversion option } - { Bit[bool] => Boolean[bool] | conversion auto } - { Nvarchar[&'r str] => LargeUtf8[String] | conversion owned } - { Varchar[&'r str] => LargeUtf8[String] | conversion none } - { Nchar[&'r str] => LargeUtf8[String] | conversion none } - { Char[&'r str] => LargeUtf8[String] | conversion none } - { Text[&'r str] => LargeUtf8[String] | conversion none } - { Ntext[&'r str] => LargeUtf8[String] | conversion none } - { Binary[&'r [u8]] => LargeBinary[Vec] | conversion owned } - { Varbinary[&'r [u8]] => LargeBinary[Vec] | conversion none } - { Image[&'r [u8]] => LargeBinary[Vec] | conversion none } - { Numeric[Decimal] => Float64[f64] | conversion option } - { Decimal[Decimal] => Float64[f64] | conversion none } - { Datetime[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion option } - { Datetime2[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion none } - { Smalldatetime[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion none } - { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } - { Datetimeoffset[DateTime] => DateTimeTzMicro[DateTimeWrapperMicro] | conversion option } - { Uniqueidentifier[Uuid] => LargeUtf8[String] | conversion option } - { Time[NaiveTime] => Time64Micro[NaiveTimeWrapperMicro] | conversion option } - { SmallMoney[f32] => Float32[f32] | conversion none } - { Money[f64] => Float64[f64] | conversion none } - } -); - -impl TypeConversion for MsSQLArrow2Transport { - fn convert(val: NaiveTime) -> NaiveTimeWrapperMicro { - NaiveTimeWrapperMicro(val) - } -} - -impl TypeConversion for MsSQLArrow2Transport { - fn convert(val: NaiveDateTime) -> NaiveDateTimeWrapperMicro { - NaiveDateTimeWrapperMicro(val) - } -} - -impl TypeConversion, DateTimeWrapperMicro> for MsSQLArrow2Transport { - fn convert(val: DateTime) -> DateTimeWrapperMicro { - DateTimeWrapperMicro(val) - } -} - -impl TypeConversion for MsSQLArrow2Transport { - fn convert(val: Uuid) -> String { - val.to_string() - } -} - -impl TypeConversion for MsSQLArrow2Transport { - fn convert(val: IntN) -> i64 { - val.0 - } -} - -impl TypeConversion for MsSQLArrow2Transport { - fn convert(val: FloatN) -> f64 { - val.0 - } -} - -impl TypeConversion for MsSQLArrow2Transport { - fn convert(val: Decimal) -> f64 { - val.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", val)) - } -} diff --git a/connectorx/src/transports/mysql_arrow2.rs b/connectorx/src/transports/mysql_arrow2.rs deleted file mode 100644 index 2e1c57c795..0000000000 --- a/connectorx/src/transports/mysql_arrow2.rs +++ /dev/null @@ -1,131 +0,0 @@ -//! Transport from MySQL Source to Arrow2 Destination. - -use crate::{ - destinations::arrow2::{ - typesystem::{Arrow2TypeSystem, NaiveDateTimeWrapperMicro, NaiveTimeWrapperMicro}, - Arrow2Destination, Arrow2DestinationError, - }, - impl_transport, - sources::mysql::{ - BinaryProtocol, MySQLSource, MySQLSourceError, MySQLTypeSystem, TextProtocol, - }, - typesystem::TypeConversion, -}; -use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; -use num_traits::ToPrimitive; -use rust_decimal::Decimal; -use serde_json::{to_string, Value}; -use std::marker::PhantomData; -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum MySQLArrow2TransportError { - #[error(transparent)] - Source(#[from] MySQLSourceError), - - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -/// Convert MySQL data types to Arrow2 data types. -pub struct MySQLArrow2Transport

(PhantomData

); - -impl_transport!( - name = MySQLArrow2Transport, - error = MySQLArrow2TransportError, - systems = MySQLTypeSystem => Arrow2TypeSystem, - route = MySQLSource => Arrow2Destination, - mappings = { - { Float[f32] => Float64[f64] | conversion auto } - { Double[f64] => Float64[f64] | conversion auto } - { Tiny[i8] => Int64[i64] | conversion auto } - { Short[i16] => Int64[i64] | conversion auto } - { Int24[i32] => Int64[i64] | conversion none } - { Long[i32] => Int64[i64] | conversion auto } - { LongLong[i64] => Int64[i64] | conversion auto } - { UTiny[u8] => Int64[i64] | conversion auto } - { UShort[u16] => Int64[i64] | conversion auto } - { ULong[u32] => Int64[i64] | conversion auto } - { UInt24[u32] => Int64[i64] | conversion none } - { ULongLong[u64] => Float64[f64] | conversion auto } - { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } - { Time[NaiveTime] => Time64Micro[NaiveTimeWrapperMicro] | conversion option } - { Datetime[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion option } - { Year[i16] => Int64[i64] | conversion none} - { Timestamp[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion none } - { Decimal[Decimal] => Float64[f64] | conversion option } - { VarChar[String] => LargeUtf8[String] | conversion auto } - { Char[String] => LargeUtf8[String] | conversion none } - { Enum[String] => LargeUtf8[String] | conversion none } - { Json[Value] => LargeUtf8[String] | conversion option } - { TinyBlob[Vec] => LargeBinary[Vec] | conversion auto } - { Blob[Vec] => LargeBinary[Vec] | conversion none } - { MediumBlob[Vec] => LargeBinary[Vec] | conversion none } - { LongBlob[Vec] => LargeBinary[Vec] | conversion none } - { Bit[Vec] => LargeBinary[Vec] | conversion none } - } -); - -impl_transport!( - name = MySQLArrow2Transport, - error = MySQLArrow2TransportError, - systems = MySQLTypeSystem => Arrow2TypeSystem, - route = MySQLSource => Arrow2Destination, - mappings = { - { Float[f32] => Float64[f64] | conversion auto } - { Double[f64] => Float64[f64] | conversion auto } - { Tiny[i8] => Int64[i64] | conversion auto } - { Short[i16] => Int64[i64] | conversion auto } - { Int24[i32] => Int64[i64] | conversion none } - { Long[i32] => Int64[i64] | conversion auto } - { LongLong[i64] => Int64[i64] | conversion auto } - { UTiny[u8] => Int64[i64] | conversion auto } - { UShort[u16] => Int64[i64] | conversion auto } - { ULong[u32] => Int64[i64] | conversion auto } - { UInt24[u32] => Int64[i64] | conversion none } - { ULongLong[u64] => Float64[f64] | conversion auto } - { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } - { Time[NaiveTime] => Time64Micro[NaiveTimeWrapperMicro] | conversion option } - { Datetime[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion option } - { Year[i16] => Int64[i64] | conversion none} - { Timestamp[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion none } - { Decimal[Decimal] => Float64[f64] | conversion option } - { VarChar[String] => LargeUtf8[String] | conversion auto } - { Char[String] => LargeUtf8[String] | conversion none } - { Enum[String] => LargeUtf8[String] | conversion none } - { Json[Value] => LargeUtf8[String] | conversion option } - { TinyBlob[Vec] => LargeBinary[Vec] | conversion auto } - { Blob[Vec] => LargeBinary[Vec] | conversion none } - { MediumBlob[Vec] => LargeBinary[Vec] | conversion none } - { LongBlob[Vec] => LargeBinary[Vec] | conversion none } - { Bit[Vec] => LargeBinary[Vec] | conversion none } - } -); - -impl

TypeConversion for MySQLArrow2Transport

{ - fn convert(val: NaiveTime) -> NaiveTimeWrapperMicro { - NaiveTimeWrapperMicro(val) - } -} - -impl

TypeConversion for MySQLArrow2Transport

{ - fn convert(val: NaiveDateTime) -> NaiveDateTimeWrapperMicro { - NaiveDateTimeWrapperMicro(val) - } -} - -impl

TypeConversion for MySQLArrow2Transport

{ - fn convert(val: Decimal) -> f64 { - val.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", val)) - } -} - -impl

TypeConversion for MySQLArrow2Transport

{ - fn convert(val: Value) -> String { - to_string(&val).unwrap() - } -} diff --git a/connectorx/src/transports/oracle_arrow2.rs b/connectorx/src/transports/oracle_arrow2.rs deleted file mode 100644 index fbb1a68e8c..0000000000 --- a/connectorx/src/transports/oracle_arrow2.rs +++ /dev/null @@ -1,62 +0,0 @@ -use crate::{ - destinations::arrow2::{ - typesystem::{Arrow2TypeSystem, DateTimeWrapperMicro, NaiveDateTimeWrapperMicro}, - Arrow2Destination, Arrow2DestinationError, - }, - impl_transport, - sources::oracle::{OracleSource, OracleSourceError, OracleTypeSystem}, - typesystem::TypeConversion, -}; -use chrono::{DateTime, NaiveDateTime, Utc}; -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum OracleArrow2TransportError { - #[error(transparent)] - Source(#[from] OracleSourceError), - - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -pub struct OracleArrow2Transport; - -impl_transport!( - name = OracleArrow2Transport, - error = OracleArrow2TransportError, - systems = OracleTypeSystem => Arrow2TypeSystem, - route = OracleSource => Arrow2Destination, - mappings = { - { NumFloat[f64] => Float64[f64] | conversion auto } - { Float[f64] => Float64[f64] | conversion none } - { BinaryFloat[f64] => Float64[f64] | conversion none } - { BinaryDouble[f64] => Float64[f64] | conversion none } - { NumInt[i64] => Int64[i64] | conversion auto } - { Blob[Vec] => LargeBinary[Vec] | conversion auto } - { Clob[String] => LargeUtf8[String] | conversion none } - { VarChar[String] => LargeUtf8[String] | conversion auto } - { Char[String] => LargeUtf8[String] | conversion none } - { NVarChar[String] => LargeUtf8[String] | conversion none } - { NChar[String] => LargeUtf8[String] | conversion none } - { Date[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion option } - { Timestamp[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion none } - { TimestampNano[NaiveDateTime] => Date64[NaiveDateTime] | conversion auto } - { TimestampTz[DateTime] => DateTimeTzMicro[DateTimeWrapperMicro] | conversion option } - { TimestampTzNano[DateTime] => DateTimeTz[DateTime] | conversion auto } - } -); - -impl TypeConversion for OracleArrow2Transport { - fn convert(val: NaiveDateTime) -> NaiveDateTimeWrapperMicro { - NaiveDateTimeWrapperMicro(val) - } -} - -impl TypeConversion, DateTimeWrapperMicro> for OracleArrow2Transport { - fn convert(val: DateTime) -> DateTimeWrapperMicro { - DateTimeWrapperMicro(val) - } -} diff --git a/connectorx/src/transports/postgres_arrow.rs b/connectorx/src/transports/postgres_arrow.rs index 403a2c4e76..8e688d42f1 100644 --- a/connectorx/src/transports/postgres_arrow.rs +++ b/connectorx/src/transports/postgres_arrow.rs @@ -55,6 +55,7 @@ macro_rules! impl_postgres_transport { { BpChar[&'r str] => LargeUtf8[String] | conversion none } { VarChar[&'r str] => LargeUtf8[String] | conversion none } { Name[&'r str] => LargeUtf8[String] | conversion none } + { Enum[&'r str] => LargeUtf8[String] | conversion none } { Timestamp[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion option } { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } { Time[NaiveTime] => Time64Micro[NaiveTimeWrapperMicro] | conversion option } diff --git a/connectorx/src/transports/postgres_arrow2.rs b/connectorx/src/transports/postgres_arrow2.rs deleted file mode 100644 index 492d0c74e4..0000000000 --- a/connectorx/src/transports/postgres_arrow2.rs +++ /dev/null @@ -1,140 +0,0 @@ -//! Transport from Postgres Source to Arrow2 Destination. - -use crate::destinations::arrow2::{ - typesystem::{ - Arrow2TypeSystem, DateTimeWrapperMicro, NaiveDateTimeWrapperMicro, NaiveTimeWrapperMicro, - }, - Arrow2Destination, Arrow2DestinationError, -}; -use crate::sources::postgres::{ - BinaryProtocol, CSVProtocol, CursorProtocol, PostgresSource, PostgresSourceError, - PostgresTypeSystem, SimpleProtocol, -}; -use crate::typesystem::TypeConversion; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use num_traits::ToPrimitive; -use postgres::NoTls; -use postgres_openssl::MakeTlsConnector; -use rust_decimal::Decimal; -use serde_json::Value; -use std::marker::PhantomData; -use thiserror::Error; -use uuid::Uuid; - -#[derive(Error, Debug)] -pub enum PostgresArrow2TransportError { - #[error(transparent)] - Source(#[from] PostgresSourceError), - - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -/// Convert Postgres data types to Arrow2 data types. -pub struct PostgresArrow2Transport(PhantomData

, PhantomData); - -macro_rules! impl_postgres_transport { - ($proto:ty, $tls:ty) => { - impl_transport!( - name = PostgresArrow2Transport<$proto, $tls>, - error = PostgresArrow2TransportError, - systems = PostgresTypeSystem => Arrow2TypeSystem, - route = PostgresSource<$proto, $tls> => Arrow2Destination, - mappings = { - { Float4[f32] => Float32[f32] | conversion auto } - { Float8[f64] => Float64[f64] | conversion auto } - { Numeric[Decimal] => Float64[f64] | conversion option } - { Int2[i16] => Int32[i32] | conversion auto } - { Int4[i32] => Int32[i32] | conversion auto } - { Int8[i64] => Int64[i64] | conversion auto } - { Bool[bool] => Boolean[bool] | conversion auto } - { Text[&'r str] => LargeUtf8[String] | conversion owned } - { BpChar[&'r str] => LargeUtf8[String] | conversion none } - { VarChar[&'r str] => LargeUtf8[String] | conversion none } - { Enum[&'r str] => LargeUtf8[String] | conversion none } - { Name[&'r str] => LargeUtf8[String] | conversion none } - { Timestamp[NaiveDateTime] => Date64Micro[NaiveDateTimeWrapperMicro] | conversion option } - { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } - { Time[NaiveTime] => Time64Micro[NaiveTimeWrapperMicro] | conversion option } - { TimestampTz[DateTime] => DateTimeTzMicro[DateTimeWrapperMicro] | conversion option } - { UUID[Uuid] => LargeUtf8[String] | conversion option } - { Char[&'r str] => LargeUtf8[String] | conversion none } - { ByteA[Vec] => LargeBinary[Vec] | conversion auto } - { JSON[Value] => LargeUtf8[String] | conversion option } - { JSONB[Value] => LargeUtf8[String] | conversion none } - { BoolArray[Vec] => BoolArray[Vec] | conversion auto_vec } - { Int2Array[Vec] => Int64Array[Vec] | conversion auto_vec } - { Int4Array[Vec] => Int64Array[Vec] | conversion auto_vec } - { Int8Array[Vec] => Int64Array[Vec] | conversion auto } - { Float4Array[Vec] => Float64Array[Vec] | conversion auto_vec } - { Float8Array[Vec] => Float64Array[Vec] | conversion auto } - { NumericArray[Vec] => Float64Array[Vec] | conversion option } - { VarcharArray[Vec] => Utf8Array[Vec] | conversion none } - { TextArray[Vec] => Utf8Array[Vec] | conversion auto } - - } - ); - } -} - -impl_postgres_transport!(BinaryProtocol, NoTls); -impl_postgres_transport!(BinaryProtocol, MakeTlsConnector); -impl_postgres_transport!(CSVProtocol, NoTls); -impl_postgres_transport!(CSVProtocol, MakeTlsConnector); -impl_postgres_transport!(CursorProtocol, NoTls); -impl_postgres_transport!(CursorProtocol, MakeTlsConnector); -impl_postgres_transport!(SimpleProtocol, NoTls); -impl_postgres_transport!(SimpleProtocol, MakeTlsConnector); - -impl TypeConversion for PostgresArrow2Transport { - fn convert(val: NaiveTime) -> NaiveTimeWrapperMicro { - NaiveTimeWrapperMicro(val) - } -} - -impl TypeConversion - for PostgresArrow2Transport -{ - fn convert(val: NaiveDateTime) -> NaiveDateTimeWrapperMicro { - NaiveDateTimeWrapperMicro(val) - } -} - -impl TypeConversion, DateTimeWrapperMicro> for PostgresArrow2Transport { - fn convert(val: DateTime) -> DateTimeWrapperMicro { - DateTimeWrapperMicro(val) - } -} - -impl TypeConversion for PostgresArrow2Transport { - fn convert(val: Uuid) -> String { - val.to_string() - } -} - -impl TypeConversion for PostgresArrow2Transport { - fn convert(val: Decimal) -> f64 { - val.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", val)) - } -} - -impl TypeConversion, Vec> for PostgresArrow2Transport { - fn convert(val: Vec) -> Vec { - val.into_iter() - .map(|v| { - v.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", v)) - }) - .collect() - } -} - -impl TypeConversion for PostgresArrow2Transport { - fn convert(val: Value) -> String { - val.to_string() - } -} diff --git a/connectorx/src/transports/sqlite_arrow2.rs b/connectorx/src/transports/sqlite_arrow2.rs deleted file mode 100644 index caeb1edead..0000000000 --- a/connectorx/src/transports/sqlite_arrow2.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! Transport from SQLite Source to Arrow2 Destination. - -use crate::{ - destinations::arrow2::{ - typesystem::Arrow2TypeSystem, Arrow2Destination, Arrow2DestinationError, - }, - impl_transport, - sources::sqlite::{SQLiteSource, SQLiteSourceError, SQLiteTypeSystem}, - typesystem::TypeConversion, -}; -use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum SQLiteArrow2TransportError { - #[error(transparent)] - Source(#[from] SQLiteSourceError), - - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -/// Convert SQLite data types to Arrow2 data types. -pub struct SQLiteArrow2Transport; - -impl_transport!( - name = SQLiteArrow2Transport, - error = SQLiteArrow2TransportError, - systems = SQLiteTypeSystem => Arrow2TypeSystem, - route = SQLiteSource => Arrow2Destination, - mappings = { - { Bool[bool] => Boolean[bool] | conversion auto } - { Int8[i64] => Int64[i64] | conversion auto } - { Int4[i32] => Int64[i64] | conversion auto } - { Int2[i16] => Int64[i64] | conversion auto } - { Real[f64] => Float64[f64] | conversion auto } - { Text[Box] => LargeUtf8[String] | conversion option } - { Blob[Vec] => LargeBinary[Vec] | conversion auto } - { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } - { Time[NaiveTime] => Time64[NaiveTime] | conversion auto } - { Timestamp[NaiveDateTime] => Date64[NaiveDateTime] | conversion auto } - } -); - -impl TypeConversion, String> for SQLiteArrow2Transport { - fn convert(val: Box) -> String { - val.to_string() - } -} diff --git a/connectorx/src/transports/trino_arrow2.rs b/connectorx/src/transports/trino_arrow2.rs deleted file mode 100644 index bc31fe6460..0000000000 --- a/connectorx/src/transports/trino_arrow2.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! Transport from Trino Source to Arrow2 Destination. - -use crate::{ - destinations::arrow2::{ - typesystem::Arrow2TypeSystem, Arrow2Destination, Arrow2DestinationError, - }, - impl_transport, - sources::trino::{TrinoSource, TrinoSourceError, TrinoTypeSystem}, - typesystem::TypeConversion, -}; -use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; -use num_traits::ToPrimitive; -use rust_decimal::Decimal; -use serde_json::{to_string, Value}; -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum TrinoArrow2TransportError { - #[error(transparent)] - Source(#[from] TrinoSourceError), - - #[error(transparent)] - Destination(#[from] Arrow2DestinationError), - - #[error(transparent)] - ConnectorX(#[from] crate::errors::ConnectorXError), -} - -/// Convert Trino data types to Arrow2 data types. -pub struct TrinoArrow2Transport(); - -impl_transport!( - name = TrinoArrow2Transport, - error = TrinoArrow2TransportError, - systems = TrinoTypeSystem => Arrow2TypeSystem, - route = TrinoSource => Arrow2Destination, - mappings = { - { Date[NaiveDate] => Date32[NaiveDate] | conversion auto } - { Time[NaiveTime] => Time64[NaiveTime] | conversion auto } - { Timestamp[NaiveDateTime] => Date64[NaiveDateTime] | conversion auto } - { Boolean[bool] => Boolean[bool] | conversion auto } - { Bigint[i32] => Int64[i64] | conversion auto } - { Integer[i32] => Int64[i64] | conversion none } - { Smallint[i16] => Int64[i64] | conversion auto } - { Tinyint[i8] => Int64[i64] | conversion auto } - { Double[f64] => Float64[f64] | conversion auto } - { Real[f32] => Float64[f64] | conversion auto } - { Varchar[String] => LargeUtf8[String] | conversion auto } - { Char[String] => LargeUtf8[String] | conversion none } - } -); - -impl TypeConversion for TrinoArrow2Transport { - fn convert(val: Decimal) -> f64 { - val.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", val)) - } -} - -impl TypeConversion for TrinoArrow2Transport { - fn convert(val: Value) -> String { - to_string(&val).unwrap() - } -} diff --git a/connectorx/tests/test_polars2.rs b/connectorx/tests/test_polars2.rs deleted file mode 100644 index 762a5c4162..0000000000 --- a/connectorx/tests/test_polars2.rs +++ /dev/null @@ -1,299 +0,0 @@ -use connectorx::{ - constants::RECORD_BATCH_SIZE, - destinations::arrow2::Arrow2Destination, - prelude::*, - sources::{ - dummy::{DummySource, DummyTypeSystem}, - postgres::{rewrite_tls_args, BinaryProtocol, PostgresSource}, - }, - sql::CXQuery, - transports::{DummyArrow2Transport, PostgresArrow2Transport}, -}; -use polars_old::{df, prelude::*}; -use postgres::NoTls; -use std::env; -use url::Url; - -#[test] -fn test_polars() { - let schema = [ - DummyTypeSystem::I64(true), - DummyTypeSystem::F64(true), - DummyTypeSystem::Bool(false), - DummyTypeSystem::String(true), - DummyTypeSystem::F64(false), - ]; - let nrows = vec![4, 7]; - let ncols = schema.len(); - let queries: Vec = nrows - .iter() - .map(|v| CXQuery::naked(format!("{},{}", v, ncols))) - .collect(); - let mut destination = Arrow2Destination::new(); - - let dispatcher = Dispatcher::<_, _, DummyArrow2Transport>::new( - DummySource::new(&["a", "b", "c", "d", "e"], &schema), - &mut destination, - &queries, - None, - ); - dispatcher.run().expect("run dispatcher"); - - let df: DataFrame = destination.polars().unwrap(); - let expected = df!( - "a" => &[0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6], - "b" => &[0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], - "c" => &[true, false, true, false, true, false, true, false, true, false, true], - "d" => &["0", "1", "2", "3", "0", "1", "2", "3", "4", "5", "6"], - "e" => &[0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - ) - .unwrap(); - - // order of each batch is not guaranteed - let expected2 = df!( - "a" => &[0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3], - "b" => &[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.0, 1.0, 2.0, 3.0], - "c" => &[true, false, true, false, true, false, true, true, false, true, false], - "d" => &["0", "1", "2", "3", "4", "5", "6", "0", "1", "2", "3"], - "e" => &[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.0, 1.0, 2.0, 3.0] - ) - .unwrap(); - - assert!(df.frame_equal_missing(&expected) || df.frame_equal_missing(&expected2)); -} - -#[test] -fn test_polars_large() { - let schema = [ - DummyTypeSystem::I64(true), - DummyTypeSystem::F64(true), - DummyTypeSystem::Bool(false), - DummyTypeSystem::String(true), - DummyTypeSystem::F64(false), - ]; - let nrows = vec![RECORD_BATCH_SIZE * 2 - 1, RECORD_BATCH_SIZE * 2 + 10]; - let ncols = schema.len(); - let queries: Vec = nrows - .iter() - .map(|v| CXQuery::naked(format!("{},{}", v, ncols))) - .collect(); - let mut destination = Arrow2Destination::new(); - - let dispatcher = Dispatcher::<_, _, DummyArrow2Transport>::new( - DummySource::new(&["a", "b", "c", "d", "e"], &schema), - &mut destination, - &queries, - None, - ); - dispatcher.run().expect("run dispatcher"); - - let df: DataFrame = destination.polars().unwrap(); - assert_eq!(RECORD_BATCH_SIZE * 4 + 9, df.height()); - assert_eq!(5, df.width()); -} - -#[test] -fn test_postgres_arrow() { - let _ = env_logger::builder().is_test(true).try_init(); - - let dburl = env::var("POSTGRES_URL").unwrap(); - - let queries = [ - CXQuery::naked("select * from test_table where test_int < 2"), - CXQuery::naked("select * from test_table where test_int >= 2"), - ]; - let url = Url::parse(dburl.as_str()).unwrap(); - let (config, _tls) = rewrite_tls_args(&url).unwrap(); - let builder = PostgresSource::::new(config, NoTls, 2).unwrap(); - let mut destination = Arrow2Destination::new(); - let dispatcher = Dispatcher::<_, _, PostgresArrow2Transport>::new( - builder, - &mut destination, - &queries, - Some(format!("select * from test_table")), - ); - - dispatcher.run().expect("run dispatcher"); - - let df: DataFrame = destination.polars().unwrap(); - - let expected = df!( - "test_int" => &[1, 0, 2, 3, 4, 1314], - "test_nullint" => &[Some(3), Some(5), None, Some(7), Some(9), Some(2)], - "test_str" => &[Some("str1"), Some("a"), Some("str2"), Some("b"), Some("c"), None], - "test_float" => &[None, Some(3.1), Some(2.2), Some(3.), Some(7.8), Some(-10.)], - "test_bool" => &[Some(true), None, Some(false), Some(false), None, Some(true)] - ) - .unwrap(); - - let expected2 = df!( - "test_int" => &[2, 3, 4, 1314, 1, 0], - "test_nullint" => &[None, Some(7), Some(9), Some(2), Some(3), Some(5)], - "test_str" => &[Some("str2"), Some("b"), Some("c"), None, Some("str1"), Some("a")], - "test_float" => &[Some(2.2), Some(3.), Some(7.8), Some(-10.), None, Some(3.1)], - "test_bool" => &[Some(false), Some(false), None, Some(true), Some(true), None] - ) - .unwrap(); - - assert!(df.frame_equal_missing(&expected) || df.frame_equal_missing(&expected2)); -} - -#[test] -fn test_pg_pl_bool_array() { - let _ = env_logger::builder().is_test(true).try_init(); - - let dburl = env::var("POSTGRES_URL").unwrap(); - - let queries = [CXQuery::naked( - "select test_boolarray from test_types where test_boolarray is not null", - )]; - let url = Url::parse(dburl.as_str()).unwrap(); - let (config, _tls) = rewrite_tls_args(&url).unwrap(); - let builder = PostgresSource::::new(config, NoTls, 2).unwrap(); - let mut destination = Arrow2Destination::new(); - let dispatcher = Dispatcher::<_, _, PostgresArrow2Transport>::new( - builder, - &mut destination, - &queries, - Some(format!("select * from test_types")), - ); - - dispatcher.run().expect("run dispatcher"); - - let s1 = Series::new("a", [true, false]); - let empty_vec: Vec = vec![]; - let s2 = Series::new("b", empty_vec); - let s3 = Series::new("c", [true]); - - let df: DataFrame = destination.polars().unwrap(); - let test_df: DataFrame = df!( - "test_boolarray" => &[s1,s2,s3] - ) - .unwrap(); - - println!("{:?}", df); - assert_eq!(df, test_df); -} - -#[test] -fn test_pg_pl_varchar_array() { - let _ = env_logger::builder().is_test(true).try_init(); - - let dburl = env::var("POSTGRES_URL").unwrap(); - - let queries = [CXQuery::naked("select test_varchararray from test_types")]; - let url = Url::parse(dburl.as_str()).unwrap(); - let (config, _tls) = rewrite_tls_args(&url).unwrap(); - let builder = PostgresSource::::new(config, NoTls, 2).unwrap(); - let mut destination = Arrow2Destination::new(); - let dispatcher = Dispatcher::<_, _, PostgresArrow2Transport>::new( - builder, - &mut destination, - &queries, - Some(format!("select * from test_types")), - ); - - dispatcher.run().expect("run dispatcher"); - - let s1 = Series::new("a", ["str1", "str2"]); - let s2 = Series::new( - "b", - [ - "0123456789", - "abcdefghijklmnopqrstuvwxyz", - "!@#$%^&*()_-+=~`:;<>?/", - ], - ); - let s3 = Series::new("c", ["", " "]); - let empty_vec: Vec<&str> = vec![]; - let s4 = Series::new("d", empty_vec); - - let df: DataFrame = destination.polars().unwrap(); - let test_df: DataFrame = df!( - "test_varchararray" => &[s1,s2,s3,s4] - ) - .unwrap(); - - println!("{:?}", df); - // panic!("spurious"); - assert_eq!(df, test_df); -} - -#[test] -fn test_pg_pl_text_array() { - let _ = env_logger::builder().is_test(true).try_init(); - - let dburl = env::var("POSTGRES_URL").unwrap(); - - let queries = [CXQuery::naked("select test_textarray from test_types")]; - let url = Url::parse(dburl.as_str()).unwrap(); - let (config, _tls) = rewrite_tls_args(&url).unwrap(); - let builder = PostgresSource::::new(config, NoTls, 2).unwrap(); - let mut destination = Arrow2Destination::new(); - let dispatcher = Dispatcher::<_, _, PostgresArrow2Transport>::new( - builder, - &mut destination, - &queries, - Some(format!("select * from test_types")), - ); - - dispatcher.run().expect("run dispatcher"); - - let s1 = Series::new("a", ["text1", "text2"]); - let s2 = Series::new( - "b", - [ - "0123456789", - "abcdefghijklmnopqrstuvwxyz", - "!@#$%^&*()_-+=~`:;<>?/", - ], - ); - let s3 = Series::new("c", ["", " "]); - let empty_vec: Vec<&str> = vec![]; - let s4 = Series::new("d", empty_vec); - - let df: DataFrame = destination.polars().unwrap(); - let test_df: DataFrame = df!( - "test_textarray" => &[s1,s2,s3,s4] - ) - .unwrap(); - - println!("{:?}", df); - assert_eq!(df, test_df); -} - -#[test] - -fn test_pg_pl_name() { - let _ = env_logger::builder().is_test(true).try_init(); - - let dburl = env::var("POSTGRES_URL").unwrap(); - - let queries = [CXQuery::naked("select test_name from test_types")]; - let url = Url::parse(dburl.as_str()).unwrap(); - let (config, _tls) = rewrite_tls_args(&url).unwrap(); - let builder = PostgresSource::::new(config, NoTls, 2).unwrap(); - let mut destination = Arrow2Destination::new(); - let dispatcher = Dispatcher::<_, _, PostgresArrow2Transport>::new( - builder, - &mut destination, - &queries, - Some(format!("select * from test_types")), - ); - - dispatcher.run().expect("run dispatcher"); - - let s1 = "0"; - let s2 = "21"; - let s3 = "someName"; - let s4 = "101203203-1212323-22131235"; - - let df: DataFrame = destination.polars().unwrap(); - let test_df: DataFrame = df!( - "test_name" => &[s1,s2,s3,s4] - ) - .unwrap(); - - println!("{:?}", df); - assert_eq!(df, test_df); -} diff --git a/docs/api.md b/docs/api.md index 50fa4da597..d93e6f89fe 100644 --- a/docs/api.md +++ b/docs/api.md @@ -10,7 +10,7 @@ connectorx.read_sql(conn: Union[str, Dict[str, str]], query: Union[List[str], st - `conn: Union[str, Dict[str, str]]`: Connection string URI for querying single database or dict of database names (key) and connection string URIs (value) for querying multiple databases. - Please check out [here](https://sfu-db.github.io/connector-x/databases.html) for connection string examples of each database - `query: Union[str, List[str]]`: SQL query or list of partitioned SQL queries for fetching data. -- `return_type: str = "pandas"`: The return type of this function. It can be `arrow` (`arrow2`), `pandas`, `modin`, `dask` or `polars`. +- `return_type: str = "pandas"`: The return type of this function. It can be `arrow`, `pandas`, `modin`, `dask` or `polars`. - `protocol: str = "binary"`: The protocol used to fetch data from source, default is `binary`. Check out [here](./databases.md) to see more details. - `partition_on: Optional[str]`: The column to partition the result. - `partition_range: Optional[Tuple[int, int]]`: The value range of the partition column. diff --git a/docs/freq_questions.md b/docs/freq_questions.md index 71e9f0acfb..f5050c249b 100644 --- a/docs/freq_questions.md +++ b/docs/freq_questions.md @@ -32,6 +32,6 @@ For users who want to have pandas.DataFrame as final result. In order to avoid t ```Python import connectorx as cx -table = cx.read_sql(db_uri, query, return_type="arrow") # or arrow2 https://github.com/jorgecarleitao/arrow2 +table = cx.read_sql(db_uri, query, return_type="arrow") df = table.to_pandas(split_blocks=False, date_as_object=False) ``` \ No newline at end of file