Skip to content

Commit 599a7c9

Browse files
committed
add parquet
1 parent b2c2060 commit 599a7c9

File tree

6 files changed

+120
-10
lines changed

6 files changed

+120
-10
lines changed

Cargo.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,16 @@ rand = "0.7"
6666
serde_json = "1.0"
6767
streaming_algorithms = "0.3"
6868
tokio = { version = "0.2", features = ["macros", "time"] }
69+
70+
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
6971
wasm-bindgen-test = "0.3"
7072

7173
[build-dependencies]
7274
rustversion = "1.0"
7375

76+
[patch.crates-io]
77+
lz4 = { git = "https://github.com/alecmocatta/lz4-rs", branch = "wasm" }
78+
7479
[[example]]
7580
name = "cloudfront_logs"
7681
required-features = ["aws"]
@@ -126,6 +131,10 @@ name = "parquet_dist"
126131
harness = false
127132
required-features = ["parquet"]
128133

134+
[[test]]
135+
name = "parquet_wasm"
136+
required-features = ["parquet"]
137+
129138
[[test]]
130139
name = "csv"
131140
required-features = ["csv"]
@@ -135,6 +144,10 @@ name = "csv_dist"
135144
harness = false
136145
required-features = ["csv"]
137146

147+
[[test]]
148+
name = "csv_wasm"
149+
required-features = ["csv"]
150+
138151
[[test]]
139152
name = "json"
140153
required-features = ["json"]

amadeus-parquet/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ serde_closure = "0.3"
3939
snap = "1.0"
4040
sum = "0.1"
4141
thrift = "0.13"
42-
zstd = "0.4"
42+
zstd = { version = "0.5", features = ["wasm"] }
4343

4444
[dev-dependencies]
4545
rand = "0.7"

amadeus-parquet/src/internal/util/hash_util.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,10 @@ pub fn hash<T: AsBytes>(data: &T, seed: u32) -> u32 {
2525
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2626
{
2727
if is_x86_feature_detected!("sse4.2") {
28-
unsafe { crc32_hash(data, seed) }
29-
} else {
30-
murmur_hash2_64a(data, seed as u64) as u32
28+
return unsafe { crc32_hash(data, seed) };
3129
}
3230
}
31+
murmur_hash2_64a(data, seed as u64) as u32
3332
}
3433

3534
const MURMUR_PRIME: u64 = 0xc6a4a7935bd1e995;

azure-pipelines.yml

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,33 @@ jobs:
7373
parameters:
7474
ordinal: 2
7575
endpoint: alecmocatta
76+
default:
77+
rust_toolchain: nightly
78+
rust_lint_toolchain: nightly-2020-07-12
79+
rust_flags: ''
80+
rust_packages: '-p amadeus-core -p amadeus-derive -p amadeus-parquet -p amadeus-serde -p amadeus-types -p amadeus'
81+
rust_features_clippy: ';parquet;csv;json;parquet csv json'
82+
rust_features: 'parquet csv json'
83+
rust_doc_features: 'parquet csv json'
84+
rust_target_check: ''
85+
rust_target_build: ''
86+
rust_target_run: ''
87+
matrix:
88+
mac:
89+
imageName: 'macos-latest'
90+
rust_target_run: 'wasm32-unknown-unknown'
91+
linux:
92+
imageName: 'ubuntu-latest'
93+
rust_target_run: 'wasm32-unknown-unknown'
94+
# TODO: headless browser fails: driver status: exit code: 1
95+
# windows:
96+
# imageName: 'windows-latest'
97+
# rust_target_run: 'wasm32-unknown-unknown'
98+
99+
- template: rust-n.yml@templates
100+
parameters:
101+
ordinal: 3
102+
endpoint: alecmocatta
76103
default:
77104
rust_toolchain: stable nightly
78105
rust_lint_toolchain: nightly-2020-07-12
@@ -91,6 +118,7 @@ jobs:
91118
linux:
92119
imageName: 'ubuntu-latest'
93120
rust_target_run: 'wasm32-unknown-unknown'
94-
windows:
95-
imageName: 'windows-latest'
96-
rust_target_run: 'wasm32-unknown-unknown'
121+
# TODO: headless browser fails: driver status: exit code: 1
122+
# windows:
123+
# imageName: 'windows-latest'
124+
# rust_target_run: 'wasm32-unknown-unknown'

tests/csv_wasm.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![cfg(target_arch = "wasm32")]
12
#![allow(clippy::suspicious_map)]
23

34
use std::path::PathBuf;
@@ -26,7 +27,7 @@ async fn csv() {
2627
let timer = web_sys::window().unwrap().performance().unwrap();
2728
let start = timer.now();
2829

29-
let pool = ThreadPool::new(None).unwrap();
30+
let pool = &ThreadPool::new(None).unwrap();
3031

3132
#[derive(Data, Clone, PartialEq, PartialOrd, Debug)]
3233
struct GameDerived {
@@ -44,7 +45,7 @@ async fn csv() {
4445
assert_eq!(
4546
rows.par_stream()
4647
.map(|row: Result<_, _>| row.unwrap())
47-
.count(&pool)
48+
.count(pool)
4849
.await,
4950
100_000
5051
);
@@ -70,7 +71,7 @@ async fn csv() {
7071
let _: GameDerived2 = value.clone().downcast().unwrap();
7172
value
7273
})
73-
.count(&pool)
74+
.count(pool)
7475
.await,
7576
100_000
7677
);

tests/parquet_wasm.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#![cfg(target_arch = "wasm32")]
2+
#![allow(clippy::suspicious_map)]
3+
4+
use std::path::PathBuf;
5+
use wasm_bindgen::prelude::*;
6+
use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure};
7+
8+
use amadeus::prelude::*;
9+
10+
wasm_bindgen_test_configure!(run_in_browser);
11+
12+
#[wasm_bindgen]
13+
extern "C" {
14+
#[wasm_bindgen(js_namespace = console)]
15+
fn log(s: &str);
16+
}
17+
macro_rules! print {
18+
($($t:tt)*) => (log(&format_args!($($t)*).to_string()));
19+
}
20+
macro_rules! println {
21+
($fmt:expr) => (print!(concat!($fmt, "\n")));
22+
($fmt:expr, $($t:tt)*) => (print!(concat!($fmt, "\n"), $($t)*));
23+
}
24+
25+
#[no_mangle]
26+
pub extern "C" fn malloc(_size: usize) -> *mut std::ffi::c_void {
27+
panic!()
28+
}
29+
#[no_mangle]
30+
pub extern "C" fn free(_ptr: *mut std::ffi::c_void) {
31+
panic!()
32+
}
33+
#[no_mangle]
34+
pub extern "C" fn calloc(_nmemb: usize, _size: usize) -> *mut std::ffi::c_void {
35+
panic!()
36+
}
37+
#[no_mangle]
38+
pub extern "C" fn realloc(_ptr: *mut std::ffi::c_void, _size: usize) -> *mut std::ffi::c_void {
39+
panic!()
40+
}
41+
42+
#[wasm_bindgen_test]
43+
async fn parquet() {
44+
let timer = web_sys::window().unwrap().performance().unwrap();
45+
let start = timer.now();
46+
47+
let pool = &ThreadPool::new(None).unwrap();
48+
49+
let rows = Parquet::<_, Value>::new(vec![
50+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=02/part-00176-17868f39-cd99-4b60-bb48-8daf9072122e.c000.snappy.parquet"),
51+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=02/part-00176-ed461019-4a12-46fa-a3f3-246d58f0ee06.c000.snappy.parquet"),
52+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=03/part-00137-17868f39-cd99-4b60-bb48-8daf9072122e.c000.snappy.parquet"),
53+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=04/part-00173-17868f39-cd99-4b60-bb48-8daf9072122e.c000.snappy.parquet"),
54+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=05/part-00025-17868f39-cd99-4b60-bb48-8daf9072122e.c000.snappy.parquet"),
55+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=05/part-00025-96c249f4-3a10-4509-b6b8-693a5d90dbf3.c000.snappy.parquet"),
56+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=06/part-00185-96c249f4-3a10-4509-b6b8-693a5d90dbf3.c000.snappy.parquet"),
57+
PathBuf::from("amadeus-testing/parquet/cf-accesslogs/year=2018/month=11/day=07/part-00151-96c249f4-3a10-4509-b6b8-693a5d90dbf3.c000.snappy.parquet"),
58+
]).await.unwrap();
59+
assert_eq!(
60+
rows.par_stream()
61+
.map(|row: Result<_, _>| row.unwrap())
62+
.count(pool)
63+
.await,
64+
207_535
65+
);
66+
67+
let elapsed = timer.now() - start;
68+
println!("in {}s", elapsed / 1000.0);
69+
}

0 commit comments

Comments
 (0)