Skip to content

Commit 3926623

Browse files
authored
Merge pull request #1207 from finos/csv-arrow
Upgrade WebAssembly build to Arrow 1.0.1
2 parents 337ee8e + ec09c74 commit 3926623

File tree

24 files changed

+1269
-263
lines changed

24 files changed

+1269
-263
lines changed

cmake/arrow.txt.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ project(arrow-download NONE)
55
include(ExternalProject)
66
ExternalProject_Add(apachearrow
77
GIT_REPOSITORY https://github.com/apache/arrow.git
8-
GIT_TAG apache-arrow-0.16.0
8+
GIT_TAG apache-arrow-1.0.1
99
SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src"
1010
BINARY_DIR "${CMAKE_BINARY_DIR}/arrow-build"
1111
CONFIGURE_COMMAND ""

cmake/arrow/CMakeLists.txt

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
set(CMAKE_SHARED_LIBRARY_SUFFIX .so)
22

33
set(ARROW_SRCS
4-
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array.cc
54
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/builder.cc
65
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc
6+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_base.cc
7+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_binary.cc
8+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_decimal.cc
9+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_dict.cc
10+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_nested.cc
11+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_primitive.cc
712
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_adaptive.cc
813
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_base.cc
914
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_binary.cc
@@ -13,11 +18,14 @@ set(ARROW_SRCS
1318
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_primitive.cc
1419
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_union.cc
1520
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/concatenate.cc
16-
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/dict_internal.cc
21+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/data.cc
1722
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/diff.cc
23+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/util.cc
1824
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/validate.cc
1925
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/buffer.cc
26+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/chunked_array.cc
2027
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compare.cc
28+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/device.cc
2129
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/extension_type.cc
2230
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/memory_pool.cc
2331
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc
@@ -34,9 +42,9 @@ set(ARROW_SRCS
3442
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/converter.cc
3543
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/chunker.cc
3644
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_builder.cc
45+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_decoder.cc
3746
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/options.cc
3847
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/parser.cc
39-
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc
4048
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/filesystem.cc
4149
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/localfs.cc
4250
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/mockfs.cc
@@ -50,17 +58,22 @@ set(ARROW_SRCS
5058
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/json/reader.cc
5159
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/buffered.cc
5260
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/compressed.cc
53-
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/file.cc
5461
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/interfaces.cc
5562
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/memory.cc
5663
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/testing/util.cc
5764
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/basic_decimal.cc
65+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_block_counter.cc
5866
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_util.cc
67+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_builders.cc
68+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_ops.cc
5969
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/compression.cc
6070
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cpu_info.cc
6171
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/decimal.cc
72+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/future.cc
73+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/delimiting.cc
6274
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/int_util.cc
6375
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/io_util.cc
76+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/iterator.cc
6477
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/logging.cc
6578
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/key_value_metadata.cc
6679
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/memory.cc
@@ -70,11 +83,17 @@ set(ARROW_SRCS
7083
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/thread_pool.cc
7184
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/trie.cc
7285
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/utf8.cc
86+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/value_parsing.cc
87+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
88+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/cached-powers.cc
89+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/diy-fp.cc
90+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/bignum.cc
91+
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/strtod.cc
7392
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/datetime/tz.cpp
7493
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/dictionary.cc
7594
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/feather.cc
76-
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc
77-
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc
95+
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc
96+
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc
7897
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_simple.cc
7998
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/message.cc
8099
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/metadata_internal.cc

cmake/arrow/config.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#define ARROW_VERSION_MAJOR 0
19-
#define ARROW_VERSION_MINOR 16
20-
#define ARROW_VERSION_PATCH 0
18+
#define ARROW_VERSION_MAJOR 1
19+
#define ARROW_VERSION_MINOR 0
20+
#define ARROW_VERSION_PATCH 1
2121
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
2222

2323
/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */

cmake/modules/FindFlatbuffers.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ if(NOT ${FLATBUFFERS_INCLUDE_DIR})
4242
set(FLATBUFFERS_INCLUDE_DIR /usr/local/include)
4343
endif()
4444

45-
message("${FLATBUFFERS_COMPILER}")
4645
include(FindPackageHandleStandardArgs)
4746
find_package_handle_standard_args(FLATBUFFERS REQUIRED_VARS
4847
FLATBUFFERS_INCLUDE_DIR FLATBUFFERS_COMPILER)

cmake/modules/FindPyArrow.cmake

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ execute_process(
2424
"from __future__ import print_function\ntry: import pyarrow; print(' '.join(pyarrow.get_libraries()), end='')\nexcept:pass"
2525
OUTPUT_VARIABLE __pyarrow_libraries)
2626

27-
# And the version
27+
# And the version
2828
execute_process(
2929
COMMAND "${Python_EXECUTABLE}" -c
3030
"from __future__ import print_function\ntry: import pyarrow; print(pyarrow.__version__, end='')\nexcept:pass"
@@ -45,23 +45,38 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
4545
# windows its just "arrow.dll"
4646
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY "arrow_python")
4747
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY "arrow")
48-
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
48+
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND ${PYARROW_VERSION_MAJOR} EQUAL "1")
49+
# Link against pre-built libarrow on MacOS
50+
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.100.dylib)
51+
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.100.dylib)
4952
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
5053
# Link against pre-built libarrow on MacOS
5154
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.${PYARROW_VERSION_MINOR}.dylib)
5255
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.${PYARROW_VERSION_MINOR}.dylib)
53-
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
54-
else()
56+
elseif (${PYARROW_VERSION_MAJOR} EQUAL "1")
5557
# linux
56-
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
57-
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
58-
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
58+
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.100)
59+
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.100)
60+
else()
61+
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
62+
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
5963
endif()
6064

65+
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
66+
6167
if(PYTHON_PYARROW_INCLUDE_DIR AND PYTHON_PYARROW_LIBRARIES)
6268
set(PYTHON_PYARROW_FOUND 1 CACHE INTERNAL "Python pyarrow found")
6369
endif()
6470

71+
72+
# set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
73+
# else()
74+
# # linux
75+
# set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
76+
# set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
77+
78+
79+
6580
include(FindPackageHandleStandardArgs)
6681
find_package_handle_standard_args(PyArrow REQUIRED_VARS PYTHON_PYARROW_INCLUDE_DIR PYTHON_PYARROW_LIBRARIES PYTHON_PYARROW_LIBRARY_DIR
6782
VERSION_VAR __pyarrow_version)

cpp/perspective/CMakeLists.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,13 @@ set (SOURCE_FILES
548548

549549
set(PYTHON_SOURCE_FILES ${SOURCE_FILES}
550550
${PSP_PYTHON_SRC}/src/column.cpp
551-
)
551+
)
552+
553+
set(WASM_SOURCE_FILES ${SOURCE_FILES}
554+
${PSP_CPP_SRC}/src/cpp/arrow_csv.cpp
555+
${PSP_CPP_SRC}/src/cpp/vendor/arrow_single_threaded_reader.cpp
556+
)
557+
552558

553559
set (PYTHON_BINDING_SOURCE_FILES
554560
${PSP_PYTHON_SRC}/src/accessor.cpp
@@ -570,7 +576,7 @@ else()
570576
endif()
571577

572578
if (PSP_WASM_BUILD)
573-
add_library(psp ${SOURCE_FILES})
579+
add_library(psp ${WASM_SOURCE_FILES})
574580
target_compile_definitions(psp PRIVATE PSP_ENABLE_WASM=1)
575581
set_target_properties(psp PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}")
576582
target_link_libraries(psp arrow)
@@ -663,7 +669,7 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
663669
endif()
664670
########################
665671
else()
666-
add_library(psp SHARED ${SOURCE_FILES})
672+
add_library(psp SHARED ${WASM_SOURCE_FILES})
667673

668674
# Link perspective against custom-built minimal arrow
669675
target_link_libraries(psp arrow)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/******************************************************************************
2+
*
3+
* Copyright (c) 2019, the Perspective Authors.
4+
*
5+
* This file is part of the Perspective library, distributed under the terms of
6+
* the Apache License 2.0. The full license can be found in the LICENSE file.
7+
*
8+
*/
9+
10+
#include <perspective/base.h>
11+
#include <perspective/arrow_csv.h>
12+
#include <arrow/util/value_parsing.h>
13+
#include <arrow/io/memory.h>
14+
15+
// This causes build warnings
16+
// https://github.com/emscripten-core/emscripten/issues/8574
17+
#include <perspective/vendor/arrow_single_threaded_reader.h>
18+
19+
namespace perspective {
20+
namespace apachearrow {
21+
22+
std::shared_ptr<::arrow::Table>
23+
csvToTable(std::string& csv, bool is_update,
24+
std::unordered_map<std::string, std::shared_ptr<arrow::DataType>>&
25+
schema) {
26+
arrow::MemoryPool* pool = arrow::default_memory_pool();
27+
auto input = std::make_shared<arrow::io::BufferReader>(csv);
28+
auto read_options = arrow::csv::ReadOptions::Defaults();
29+
auto parse_options = arrow::csv::ParseOptions::Defaults();
30+
auto convert_options = arrow::csv::ConvertOptions::Defaults();
31+
32+
read_options.use_threads = false;
33+
convert_options.timestamp_parsers
34+
= std::vector<std::shared_ptr<arrow::TimestampParser>>{
35+
arrow::TimestampParser::MakeISO8601(),
36+
arrow::TimestampParser::MakeStrptime("%Y-%m-%d\\D%H:%M:%S.%f"),
37+
arrow::TimestampParser::MakeStrptime("%m-%d-%Y"),
38+
arrow::TimestampParser::MakeStrptime("%m/%d/%Y"),
39+
arrow::TimestampParser::MakeStrptime("%d %m %Y"),
40+
arrow::TimestampParser::MakeStrptime("%H:%M:%S.%f"),
41+
};
42+
43+
if (is_update) {
44+
convert_options.column_types = std::move(schema);
45+
}
46+
47+
auto maybe_reader = arrow::csv::TableReader::Make(
48+
pool, input, read_options, parse_options, convert_options);
49+
50+
std::shared_ptr<arrow::csv::TableReader> reader = *maybe_reader;
51+
52+
auto maybe_table = reader->Read();
53+
if (!maybe_table.ok()) {
54+
PSP_COMPLAIN_AND_ABORT(maybe_table.status().ToString());
55+
}
56+
return *maybe_table;
57+
}
58+
59+
} // namespace apachearrow
60+
} // namespace perspective

0 commit comments

Comments
 (0)