Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Add basic Python API for HDK #20

Merged
merged 7 commits into from
May 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ build/*
# Geospatial Artifacts copied to make Geospatial module build
ThirdParty/geo_samples
ThirdParty/gdal-data

# Cython generated files
python/pyhdk/*.cpp
98 changes: 95 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,30 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
"Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif ()

set(ENABLE_CONDA OFF)
if(DEFINED ENV{CONDA_PREFIX})
set(ENABLE_CONDA ON)
set(CMAKE_SYSROOT "$ENV{CONDA_BUILD_SYSROOT}")
list(APPEND CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX}")
set(CMAKE_INSTALL_PREFIX "$ENV{CONDA_PREFIX}")
endif()

option(BUILD_SHARED_LIBS "Build shared libraries" ON)
option(ENABLE_PYTHON "Build Python libraries" ON)

if(BUILD_SHARED_LIBS)
add_definitions("-DENABLE_SHARED_LIBS")
# With no this option all installed shared objects would get an empty
# rpath that would break a link with libjvm.so.
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
endif()

# Copy ThirdParty to build dir so OmniSciDB dependencies can be copied over. Note that third_party is available internally for HDK specific dependencies.
file(COPY "${CMAKE_SOURCE_DIR}/ThirdParty" DESTINATION "${CMAKE_BINARY_DIR}/")
file(COPY "${CMAKE_SOURCE_DIR}/omniscidb/ThirdParty/geo_samples" DESTINATION "${CMAKE_SOURCE_DIR}/ThirdParty")
file(COPY "${CMAKE_SOURCE_DIR}/omniscidb/ThirdParty/gdal-data" DESTINATION "${CMAKE_SOURCE_DIR}/ThirdParty")

# External Dependencies
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake_modules")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/omniscidb/cmake/Modules")

# Google log
add_subdirectory(third_party/glog-0.5.0)
Expand All @@ -39,28 +56,103 @@ find_package(Arrow REQUIRED)
add_definitions("-DARROW_NO_DEPRECATED_API")
include_directories(${Arrow_INCLUDE_DIRS})

# Parquet
find_package(Parquet REQUIRED)

# Boost, required for OmniSciDB
add_definitions("-DBOOST_LOG_DYN_LINK") # dyn linking only
find_package(Boost COMPONENTS log log_setup filesystem program_options regex system thread timer locale iostreams REQUIRED)
include_directories(${Boost_INCLUDE_DIR})

# TBB
find_package(TBB REQUIRED)
add_definitions("-DENABLE_TBB")
add_definitions("-DHAVE_TBB")
add_definitions("-DTBB_PREVIEW_TASK_GROUP_EXTENSIONS=1")

# LLVM
find_library(CLANG_LIB clang-cpp)
find_library(LLVM_LIB LLVM)
# Deps builds use separate libs for each clang component, while some distros now bundle into a single lib
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" OR NOT LLVM_LIB)
set(LLVM_COMPONENTS support mcjit core irreader option linker)

if(ENABLE_INTEL_JIT_LISTENER)
list(APPEND LLVM_COMPONENTS inteljitevents)
endif()

llvm_map_components_to_libnames(llvm_libs ${LLVM_TARGETS_TO_BUILD} ${LLVM_COMPONENTS})
set(clang_libs
clangFrontend
clangSerialization
clangDriver
clangTooling
clangParse
clangSema
clangAnalysis
clangEdit
clangAST
clangLex
clangBasic
clangRewrite
clangRewriteFrontend)

# LLVMSupport explicitly lists tinfo in its INTERFACE_LINK_LIBRARIES, even
# though we provide it in our build of ncurses. Since LLVMSupport is listed
# as a requirement for other llvm libs, we need to walk through the entire
# list in order to remove all instances of tinfo.
foreach(lib ${llvm_libs})
get_target_property(interface_libs ${lib} INTERFACE_LINK_LIBRARIES)
list(REMOVE_ITEM interface_libs tinfo z rt pthread -lpthread m dl)
set_target_properties(${lib} PROPERTIES INTERFACE_LINK_LIBRARIES "${interface_libs}")
endforeach()

list(APPEND llvm_libs ${CURSES_NCURSES_LIBRARY})
else()
if(NOT CLANG_LIB)
message(FATAL_ERROR "Could not find CLANG library.")
endif()

set(clang_libs ${CLANG_LIB})
set(llvm_libs ${LLVM_LIB})
endif()

# OmniSciDB submodule
include_directories(${CMAKE_SOURCE_DIR}/omniscidb)

add_subdirectory(omniscidb/Shared)

add_subdirectory(omniscidb/OSDependent)

# TODO: replace with glog
include_directories(omniscidb/ThirdParty/rapidjson)
add_definitions(-DRAPIDJSON_HAS_STDSTRING)
include_directories(omniscidb/ThirdParty/googletest)
add_subdirectory(omniscidb/ThirdParty/googletest)

# TODO: replace with glog
add_subdirectory(omniscidb/Logger)

add_subdirectory(omniscidb/Utils)
add_subdirectory(omniscidb/Calcite)
add_subdirectory(omniscidb/SchemaMgr)
add_subdirectory(omniscidb/StringDictionary)
add_subdirectory(omniscidb/L0Mgr)
add_subdirectory(omniscidb/CudaMgr)
add_subdirectory(omniscidb/DataMgr)
add_subdirectory(omniscidb/ArrowStorage)
add_subdirectory(omniscidb/Analyzer)
add_subdirectory(omniscidb/SqliteConnector)
add_subdirectory(omniscidb/QueryEngine)

# Source
add_subdirectory(src)

if(BUILD_SHARED_LIBS AND ENABLE_PYTHON)
add_subdirectory(python)
endif()

install(TARGETS OSDependent Logger Shared Utils Calcite ArrowStorage StringDictionary DataMgr CudaMgr SchemaMgr L0Mgr QueryEngine Analyzer SqliteConnector RUNTIME)

add_executable(TestDriver apps/TestDriver.cpp)

target_link_libraries(TestDriver PRIVATE HDK)
Expand Down
2 changes: 1 addition & 1 deletion omniscidb
31 changes: 31 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
set(SETUP_PY "${CMAKE_CURRENT_BINARY_DIR}/setup.py")

configure_file("${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in" "${SETUP_PY}")

find_package(PythonInterp REQUIRED)

file(GLOB_RECURSE PY_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} pyhdk/*.py)
file(GLOB_RECURSE PYX_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} pyhdk/*.pxd)
file(GLOB_RECURSE PXD_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} pyhdk/*.pyx)

set(pydeps
${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${PY_SOURCES}
${PYX_SOURCES}
${PXD_SOURCES}
Calcite)

set(SETUP_LDFLAGS "-L$<TARGET_FILE_DIR:Calcite> -L$<TARGET_FILE_DIR:ArrowStorage> -L$<TARGET_FILE_DIR:QueryEngine> -L$<TARGET_FILE_DIR:SchemaMgr>")
set(SETUP_FLAGS -g -f -I ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(pyhdk ALL
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && LDFLAGS=${SETUP_LDFLAGS} ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext ${SETUP_FLAGS}
DEPENDS Calcite ${pydeps}
)

add_custom_target(pyhdk-install
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && LDFLAGS=${SETUP_LDFLAGS} ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext ${SETUP_FLAGS} install
DEPENDS Calcite ${pydeps}
)

string(REPLACE ";" " " SETUP_FLAGS_STR "${SETUP_FLAGS}")
install(CODE "execute_process(WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND bash -c \"LDFLAGS='${SETUP_LDFLAGS}' ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext ${SETUP_FLAGS_STR} install\")")
33 changes: 33 additions & 0 deletions python/pyhdk/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#
# Copyright 2022 Intel Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import os

# We set these dlopen flags to allow calls from JIT code
# to HDK shared objects. Otherwise, such relocations would
# be unresolved and we would have calls by zero address.
# TODO: Is there a way to avoid this in the Python code?
if sys.platform == "linux":
prev = sys.getdlopenflags()
sys.setdlopenflags(os.RTLD_LAZY | os.RTLD_GLOBAL)

from pyhdk._common import TypeInfo, SQLType, setGlobalConfig
from pyhdk._execute import Executor
import pyhdk.sql as sql
import pyhdk.storage as storage

if sys.platform == "linux":
sys.setdlopenflags(prev)
94 changes: 94 additions & 0 deletions python/pyhdk/_common.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#
# Copyright 2022 Intel Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from libcpp cimport bool
from libcpp.string cimport string

cdef extern from "omniscidb/Shared/sqltypes.h":
enum CSQLTypes "SQLTypes":
kNULLT = 0,
kBOOLEAN = 1,
kCHAR = 2,
kVARCHAR = 3,
kNUMERIC = 4,
kDECIMAL = 5,
kINT = 6,
kSMALLINT = 7,
kFLOAT = 8,
kDOUBLE = 9,
kTIME = 10,
kTIMESTAMP = 11,
kBIGINT = 12,
kTEXT = 13,
kDATE = 14,
kARRAY = 15,
kINTERVAL_DAY_TIME = 16,
kINTERVAL_YEAR_MONTH = 17,
kTINYINT = 18,
kEVAL_CONTEXT_TYPE = 19,
kVOID = 20,
kCURSOR = 21,
kCOLUMN = 22,
kCOLUMN_LIST = 23,
kSQLTYPE_LAST = 24,

enum CEncodingType "EncodingType":
kENCODING_NONE = 0,
kENCODING_FIXED = 1,
kENCODING_RL = 2,
kENCODING_DIFF = 3,
kENCODING_DICT = 4,
kENCODING_SPARSE = 5,
kENCODING_DATE_IN_DAYS = 7,
kENCODING_LAST = 8,

cdef cppclass CSQLTypeInfo "SQLTypeInfo":
CSQLTypeInfo(CSQLTypes t, int d, int s, bool n, CEncodingType c, int p, CSQLTypes st)
CSQLTypeInfo(CSQLTypes t, int d, int s, bool n)
CSQLTypeInfo(CSQLTypes t, CEncodingType c, int p, CSQLTypes st)
CSQLTypeInfo(CSQLTypes t, int d, int s)
CSQLTypeInfo(CSQLTypes t, bool n)
CSQLTypeInfo(CSQLTypes t)
CSQLTypeInfo(CSQLTypes t, bool n, CEncodingType c)
CSQLTypeInfo()

CSQLTypes get_type()
CSQLTypes get_subtype()
int get_dimension()
int get_precision()
int get_input_srid()
int get_scale()
int get_output_srid()
bool get_notnull()
CEncodingType get_compression()
int get_comp_param()
int get_size()
int get_logical_size()

string toString()

cdef class TypeInfo:
cdef CSQLTypeInfo c_type_info

cdef extern from "omniscidb/Shared/SystemParameters.h":
cdef cppclass CSystemParameters "SystemParameters":
CSystemParameters()

cdef extern from "omniscidb/ThriftHandler/CommandLineOptions.h":
cdef bool g_enable_columnar_output
cdef bool g_enable_union
cdef bool g_enable_lazy_fetch
cdef bool g_null_div_by_zero
cdef bool g_enable_dynamic_watchdog
Loading