Skip to content

Commit fa13838

Browse files
authored
Merge pull request #435 from jpmorganchase/write-arrows
Write arrows
2 parents 8d30e73 + 7bd5e1d commit fa13838

File tree

16 files changed

+465
-172
lines changed

16 files changed

+465
-172
lines changed

cpp/perspective/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required (VERSION 2.8.11)
22
project (psp)
3-
set(CMAKE_CXX_STANDARD 11)
3+
set(CMAKE_CXX_STANDARD 14)
44
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
55
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/../../cmake/modules/" ${CMAKE_MODULE_PATH} )
66

cpp/perspective/src/cpp/base.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ namespace perspective {
1616

1717
void
1818
psp_abort() {
19+
std::cerr << "abort()" << std::endl;
1920
std::raise(SIGINT);
2021
}
2122

cpp/perspective/src/cpp/emscripten.cpp

Lines changed: 179 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ namespace binding {
208208
* val
209209
*/
210210
val
211-
scalar_to_val(const t_tscalar& scalar) {
211+
scalar_to_val(const t_tscalar& scalar, bool cast_double) {
212212
if (!scalar.is_valid()) {
213213
return val::null();
214214
}
@@ -223,7 +223,13 @@ namespace binding {
223223
case DTYPE_TIME:
224224
case DTYPE_FLOAT64:
225225
case DTYPE_FLOAT32: {
226-
return val(scalar.to_double());
226+
if (cast_double) {
227+
auto x = scalar.to_uint64();
228+
double y = *reinterpret_cast<double*>(&x);
229+
return val(y);
230+
} else {
231+
return val(scalar.to_double());
232+
}
227233
}
228234
case DTYPE_DATE: {
229235
return t_date_to_jsdate(scalar.get<t_date>()).call<val>("getTime");
@@ -274,6 +280,20 @@ namespace binding {
274280
scalar_vec_to(const std::vector<t_tscalar>& scalars, std::uint32_t idx) {
275281
return scalar_vec_to_val(scalars, idx);
276282
}
283+
284+
/**
285+
* Converts a std::vector<T> to a Typed Array, slicing directly from the
286+
* WebAssembly heap.
287+
*/
288+
template <typename T>
289+
val
290+
vector_to_typed_array(std::vector<T>& xs) {
291+
T* st = &xs[0];
292+
uintptr_t offset = reinterpret_cast<uintptr_t>(st);
293+
return val::module_property("HEAPU8").call<val>(
294+
"slice", offset, offset + (sizeof(T) * xs.size()));
295+
}
296+
277297
/**
278298
*
279299
*
@@ -359,66 +379,187 @@ namespace binding {
359379
val Int8Array = val::global("Int8Array");
360380
val Int16Array = val::global("Int16Array");
361381
val Int32Array = val::global("Int32Array");
382+
val UInt8Array = val::global("Uint8Array");
383+
val UInt32Array = val::global("Uint32Array");
362384
val Float32Array = val::global("Float32Array");
363385
val Float64Array = val::global("Float64Array");
364386
} // namespace js_typed_array
365387

388+
template <typename T>
389+
const val typed_array = val::null();
390+
391+
template <>
392+
const val typed_array<double> = js_typed_array::Float64Array;
393+
template <>
394+
const val typed_array<float> = js_typed_array::Float32Array;
395+
template <>
396+
const val typed_array<std::int8_t> = js_typed_array::Int8Array;
397+
template <>
398+
const val typed_array<std::int16_t> = js_typed_array::Int16Array;
399+
template <>
400+
const val typed_array<std::int32_t> = js_typed_array::Int32Array;
401+
template <>
402+
const val typed_array<std::uint32_t> = js_typed_array::UInt32Array;
403+
404+
template <typename F, typename T = F>
405+
T get_scalar(t_tscalar& t);
406+
407+
template <>
408+
double
409+
get_scalar<double>(t_tscalar& t) {
410+
return t.to_double();
411+
}
412+
template <>
413+
float
414+
get_scalar<float>(t_tscalar& t) {
415+
return t.to_double();
416+
}
417+
template <>
418+
std::int8_t
419+
get_scalar<std::int8_t>(t_tscalar& t) {
420+
return static_cast<std::int8_t>(t.to_int64());
421+
}
422+
template <>
423+
std::int16_t
424+
get_scalar<std::int16_t>(t_tscalar& t) {
425+
return static_cast<std::int16_t>(t.to_int64());
426+
}
427+
template <>
428+
std::int32_t
429+
get_scalar<std::int32_t>(t_tscalar& t) {
430+
return static_cast<std::int32_t>(t.to_int64());
431+
}
432+
template <>
433+
std::uint32_t
434+
get_scalar<std::uint32_t>(t_tscalar& t) {
435+
return static_cast<std::uint32_t>(t.to_int64());
436+
}
437+
template <>
438+
double
439+
get_scalar<t_date, double>(t_tscalar& t) {
440+
auto x = t.to_uint64();
441+
return *reinterpret_cast<double*>(&x);
442+
}
443+
444+
template <typename T, typename F = T, typename O = T>
445+
val
446+
col_to_typed_array(std::vector<t_tscalar> data, bool column_pivot_only) {
447+
int start_idx = column_pivot_only ? 1 : 0;
448+
int data_size = data.size() - start_idx;
449+
std::vector<T> vals;
450+
vals.reserve(data.size());
451+
int nullSize = ceil(data_size / 64.0) * 2;
452+
int nullCount = 0;
453+
std::vector<std::uint32_t> validityMap;
454+
validityMap.resize(nullSize);
455+
for (int idx = 0; idx < data.size() - start_idx; idx++) {
456+
t_tscalar scalar = data[idx + start_idx];
457+
if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) {
458+
vals.push_back(get_scalar<F, T>(scalar));
459+
validityMap[idx / 32] |= 1 << (idx % 32);
460+
} else {
461+
vals.push_back({});
462+
nullCount++;
463+
}
464+
}
465+
val arr = val::global("Array").new_();
466+
arr.call<void>("push", typed_array<O>.new_(vector_to_typed_array(vals)["buffer"]));
467+
arr.call<void>("push", nullCount);
468+
arr.call<void>("push", vector_to_typed_array(validityMap));
469+
return arr;
470+
}
471+
472+
template <>
473+
val
474+
col_to_typed_array<std::string>(std::vector<t_tscalar> data, bool column_pivot_only) {
475+
int start_idx = column_pivot_only ? 1 : 0;
476+
int data_size = data.size() - start_idx;
477+
478+
t_vocab vocab;
479+
vocab.init(false);
480+
481+
int nullSize = ceil(data_size / 64.0) * 2;
482+
int nullCount = 0;
483+
std::vector<std::uint32_t> validityMap; // = new std::uint32_t[nullSize];
484+
validityMap.resize(nullSize);
485+
val indexBuffer = js_typed_array::ArrayBuffer.new_(data_size * 4);
486+
val indexArray = js_typed_array::UInt32Array.new_(indexBuffer);
487+
488+
for (int idx = 0; idx < data.size(); idx++) {
489+
t_tscalar scalar = data[idx + start_idx];
490+
if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) {
491+
auto adx = vocab.get_interned(scalar.to_string());
492+
indexArray.call<void>("fill", val(adx), idx, idx + 1);
493+
validityMap[idx / 32] |= 1 << (idx % 32);
494+
} else {
495+
nullCount++;
496+
}
497+
}
498+
val dictBuffer = js_typed_array::ArrayBuffer.new_(
499+
vocab.get_vlendata()->size() - vocab.get_vlenidx());
500+
val dictArray = js_typed_array::UInt8Array.new_(dictBuffer);
501+
std::vector<std::uint32_t> offsets;
502+
offsets.reserve(vocab.get_vlenidx() + 1);
503+
std::uint32_t index = 0;
504+
for (auto i = 0; i < vocab.get_vlenidx(); i++) {
505+
const char* str = vocab.unintern_c(i);
506+
offsets.push_back(index);
507+
while (*str) {
508+
dictArray.call<void>("fill", val(*str++), index, index + 1);
509+
index++;
510+
}
511+
}
512+
offsets.push_back(index);
513+
514+
val arr = val::global("Array").new_();
515+
arr.call<void>("push", dictArray);
516+
arr.call<void>(
517+
"push", js_typed_array::UInt32Array.new_(vector_to_typed_array(offsets)["buffer"]));
518+
arr.call<void>("push", indexArray);
519+
arr.call<void>("push", nullCount);
520+
arr.call<void>("push", vector_to_typed_array(validityMap));
521+
return arr;
522+
}
523+
366524
// Given a column index, serialize data to TypedArray
367525
template <typename T>
368526
val
369-
col_to_js_typed_array(T ctx, t_index idx) {
527+
col_to_js_typed_array(T ctx, t_index idx, bool column_pivot_only) {
370528
std::vector<t_tscalar> data = ctx->get_data(0, ctx->get_row_count(), idx, idx + 1);
371529
auto dtype = ctx->get_column_dtype(idx);
372-
int data_size = data.size();
373-
val constructor = val::undefined();
374-
val sentinel = val::undefined();
375530

376531
switch (dtype) {
377532
case DTYPE_INT8: {
378-
data_size *= sizeof(std::int8_t);
379-
sentinel = val(std::numeric_limits<std::int8_t>::lowest());
380-
constructor = js_typed_array::Int8Array;
533+
return col_to_typed_array<std::int8_t>(data, column_pivot_only);
381534
} break;
382535
case DTYPE_INT16: {
383-
data_size *= sizeof(std::int16_t);
384-
sentinel = val(std::numeric_limits<std::int16_t>::lowest());
385-
constructor = js_typed_array::Int16Array;
536+
return col_to_typed_array<std::int16_t>(data, column_pivot_only);
537+
} break;
538+
case DTYPE_TIME: {
539+
return col_to_typed_array<double, t_date, std::int32_t>(
540+
data, column_pivot_only);
386541
} break;
387542
case DTYPE_INT32:
543+
case DTYPE_UINT32: {
544+
return col_to_typed_array<std::uint32_t>(data, column_pivot_only);
545+
} break;
388546
case DTYPE_INT64: {
389-
// scalar_to_val converts int64 into int32
390-
data_size *= sizeof(std::int32_t);
391-
sentinel = val(std::numeric_limits<std::int32_t>::lowest());
392-
constructor = js_typed_array::Int32Array;
547+
return col_to_typed_array<std::int32_t>(data, column_pivot_only);
393548
} break;
394549
case DTYPE_FLOAT32: {
395-
data_size *= sizeof(float);
396-
sentinel = val(std::numeric_limits<float>::lowest());
397-
constructor = js_typed_array::Float32Array;
550+
return col_to_typed_array<float>(data, column_pivot_only);
398551
} break;
399-
case DTYPE_TIME:
400552
case DTYPE_FLOAT64: {
401-
sentinel = val(std::numeric_limits<double>::lowest());
402-
data_size *= sizeof(double);
403-
constructor = js_typed_array::Float64Array;
553+
return col_to_typed_array<double>(data, column_pivot_only);
404554
} break;
405-
default:
406-
return constructor;
407-
}
408-
409-
val buffer = js_typed_array::ArrayBuffer.new_(data_size);
410-
val arr = constructor.new_(buffer);
411-
412-
for (int idx = 0; idx < data.size(); idx++) {
413-
t_tscalar scalar = data[idx];
414-
if (scalar.get_dtype() == DTYPE_NONE) {
415-
arr.call<void>("fill", sentinel, idx, idx + 1);
416-
} else {
417-
arr.call<void>("fill", scalar_to_val(scalar), idx, idx + 1);
555+
case DTYPE_STR: {
556+
return col_to_typed_array<std::string>(data, column_pivot_only);
557+
} break;
558+
default: {
559+
PSP_COMPLAIN_AND_ABORT("Unhandled aggregate type");
560+
return val::undefined();
418561
}
419562
}
420-
421-
return arr;
422563
}
423564

424565
void

cpp/perspective/src/include/perspective/emscripten.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ namespace binding {
4242
*/
4343
template <>
4444
emscripten::val scalar_to(const t_tscalar& scalar);
45-
emscripten::val scalar_to_val(const t_tscalar& scalar);
45+
emscripten::val scalar_to_val(const t_tscalar& scalar, bool cast_double = false);
4646

4747
template <>
4848
emscripten::val scalar_vec_to(const std::vector<t_tscalar>& scalars, std::uint32_t idx);

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"examples/*"
66
],
77
"devDependencies": {
8-
"@apache-arrow/es5-esm": "^0.3.1",
8+
"@apache-arrow/es5-esm": "^0.4.0",
99
"@babel/cli": "^7.2.3",
1010
"babel-eslint": "^8.2.3",
1111
"babel-jest": "^23.6.0",

packages/perspective-webpack-plugin/index.js

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,33 @@ class PerspectiveWebpackPlugin {
9090
loader: "babel-loader",
9191
options: BABEL_CONFIG
9292
});
93+
rules.push({
94+
test: /\.js$/,
95+
include: /node_modules[/\\](?!\@jpmorganchase)|psp\.(asmjs|async|sync)\.js|perspective\.(asmjs|wasm)\.worker\.js/,
96+
loader: "source-map-loader"
97+
});
9398
} else {
9499
rules.push({
95100
test: /\.js$/,
96-
include: this.options.load_path,
97-
exclude: /node_modules[/\\](?!\@jpmorganchase)|psp\.(asmjs|async|sync)\.js|perspective\.(asmjs|wasm)\.worker\.js/,
98101
loader: "source-map-loader"
99102
});
100103
}
104+
105+
// FIXME Workaround for performance regression in @apache-arrow 4.0
106+
rules.push({
107+
test: /\.js$/,
108+
include: /\@apache-arrow[/\\]es5-esm/,
109+
use: [
110+
{loader: "source-map-loader"},
111+
{
112+
loader: "string-replace-loader",
113+
options: {
114+
search: "BaseVector.prototype[Symbol.isConcatSpreadable] = true;",
115+
replace: ''
116+
}
117+
}
118+
]
119+
});
101120

102121
rules.push({
103122
test: /psp\.(sync|async)\.wasm\.js$/,

packages/perspective-webpack-plugin/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
"less": "^2.7.2",
4545
"less-loader": "^4.0.5",
4646
"source-map-loader": "^0.2.4",
47+
"string-replace-loader": "^2.1.1",
4748
"style-loader": "^0.18.2",
4849
"tslib": "^1.9.3",
4950
"worker-loader": "^2.0.0"

packages/perspective/bench/js/bench.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const LIMIT = args.indexOf("--limit");
1616

1717
const multi_template = (xs, ...ys) => ys[0].map((y, i) => [y, xs.reduce((z, x, ix) => (ys[ix] ? z + x + ys[ix][i] : z + x), "")]);
1818

19-
const UNPKG_VERSIONS = ["0.2.11", "0.2.10", "0.2.9", "0.2.8", "0.2.7", "0.2.6", "0.2.5", "0.2.4", "0.2.3", "0.2.2", "0.2.1", "0.2.0"];
19+
const UNPKG_VERSIONS = ["0.2.15", "0.2.12", "0.2.11", "0.2.10", "0.2.9", "0.2.8", "0.2.7", "0.2.6", "0.2.5", "0.2.4", "0.2.3", "0.2.2", "0.2.1", "0.2.0"];
2020
const UNPKG_URLS = multi_template`https://unpkg.com/@jpmorganchase/perspective@${UNPKG_VERSIONS}/build/perspective.js`;
2121

2222
const OLD_FORMAT_UNPKG_VERSIONS = ["0.2.0-beta.3"];

packages/perspective/bench/js/browser_runtime.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ async function* run_all_cases() {
144144
yield* run_view_cases(table, config);
145145
yield* run_to_format_cases(table, config, "to_json");
146146
yield* run_to_format_cases(table, config, "to_columns");
147+
yield* run_to_format_cases(table, config, "to_arrow");
147148
}
148149
}
149150
}

packages/perspective/src/js/api.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ function view(worker, table_name, config) {
7676

7777
view.prototype.to_json = async_queue("to_json");
7878

79+
view.prototype.to_arrow = async_queue("to_arrow");
80+
7981
view.prototype.to_columns = async_queue("to_columns");
8082

8183
view.prototype.to_csv = async_queue("to_csv");

0 commit comments

Comments
 (0)