Skip to content

Try import pyarrow and pandas when necessary #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
name: Build

on: [push, pull_request]
on:
push:
tags:
- 'v*'
pull_request:
branches:
- pybind

jobs:
build_wheels_linux:
Expand Down Expand Up @@ -82,6 +88,7 @@ jobs:
export CC=/usr/bin/clang
export CXX=/usr/bin/clang++
bash ./chdb/build.sh
python3 -m pip install pandas pyarrow
bash -x ./chdb/test_smoke.sh
continue-on-error: false
- name: Check ccache statistics
Expand Down Expand Up @@ -148,7 +155,6 @@ jobs:
run: |
pwd
uname -a
export HOMEBREW_NO_AUTO_UPDATE=1
export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
brew install git ccache ninja libtool gettext llvm@15 gcc binutils grep findutils zstd
export PATH=$(brew --prefix llvm@15)/bin:$PATH
Expand Down Expand Up @@ -193,12 +199,13 @@ jobs:
export CXX=$(brew --prefix llvm@15)/bin/clang++
bash gen_manifest.sh
bash ./chdb/build.sh
python3 -m pip install pandas pyarrow
bash -x ./chdb/test_smoke.sh
continue-on-error: false
- name: Keep killall ccache and wait for ccache to finish
if: always()
run: |
sleep 300
sleep 60
while ps -ef | grep ccache | grep -v grep; do \
killall ccache; \
sleep 10; \
Expand Down Expand Up @@ -261,7 +268,6 @@ jobs:
run: |
pwd
uname -a
export HOMEBREW_NO_AUTO_UPDATE=1
export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
brew install git ccache ninja libtool gettext llvm@15 gcc binutils grep findutils zstd
export PATH=$(brew --prefix llvm@15)/bin:$PATH
Expand Down Expand Up @@ -349,7 +355,7 @@ jobs:
- name: Keep killall ccache and wait for ccache to finish
if: always()
run: |
sleep 300
sleep 60
while ps -ef | grep ccache | grep -v grep; do \
killall ccache; \
sleep 10; \
Expand Down
30 changes: 21 additions & 9 deletions chdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import sys
import os
import pyarrow as pa

chdb_version = (0, 1, 0)
chdb_version = (0, 5, 0)
if sys.version_info[:2] >= (3, 7):
# get the path of the current file
current_path = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -23,19 +22,32 @@
except: # pragma: no cover
__version__ = "unknown"


def _to_arrowTable(res):
# return pyarrow table
def to_arrowTable(res):
"""convert res to arrow table"""
# try import pyarrow and pandas, if failed, raise ImportError with suggestion
try:
import pyarrow as pa
import pandas
except ImportError as e:
print(f'ImportError: {e}')
print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
raise ImportError('Failed to import pyarrow or pandas') from None

return pa.RecordBatchFileReader(res.get_memview()).read_all()

# return pandas dataframe
def to_df(r):
""""convert arrow table to Dataframe"""
t = _to_arrowTable(r)
t = to_arrowTable(r)
return t.to_pandas(use_threads=True)

# wrap _chdb functions
def query(sql, output_format="CSV", **kwargs):
if output_format.lower() == "dataframe":
r = _chdb.query(sql, "Arrow", **kwargs)
return to_df(r)
return _chdb.query(sql, output_format, **kwargs)
lower_output_format = output_format.lower()
if lower_output_format == "dataframe":
return to_df(_chdb.query(sql, "Arrow", **kwargs))
elif lower_output_format == 'arrowtable':
return to_arrowTable(_chdb.query(sql, "Arrow", **kwargs))
else:
return _chdb.query(sql, output_format, **kwargs)
23 changes: 20 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import sys
import re
import subprocess
import sysconfig
from setuptools import setup, Extension
Expand Down Expand Up @@ -57,6 +58,21 @@ def get_latest_git_tag(minor_ver_auto=False):
print(e)
raise

# replace the version in chdb/__init__.py, which is `chdb_version = (0, 1, 0)` by default
# regex replace the version string `chdb_version = (0, 1, 0)` with version parts
def fix_version_init(version):
# split version string into parts
p1, p2, p3 = version.split('.')
init_file = os.path.join(script_dir, "chdb", "__init__.py")
with open(init_file, "r+") as f:
init_content = f.read()
# regex replace the version string `chdb_version = (0, 1, 0)`
regPattern = r"chdb_version = \(\d+, \d+, \d+\)"
init_content = re.sub(regPattern, f"chdb_version = ({p1}, {p2}, {p3})", init_content)
f.seek(0)
f.write(init_content)
f.truncate()


# As of Python 3.6, CCompiler has a `has_flag` method.
# cf http://bugs.python.org/issue26689
Expand Down Expand Up @@ -147,15 +163,16 @@ def build_extensions(self):
extra_objects=[chdb_so],
),
]

# fix the version in chdb/__init__.py
versionStr = get_latest_git_tag()
fix_version_init(versionStr)
setup(
packages=['chdb'],
version=get_latest_git_tag(),
version=versionStr,
package_data={'chdb': [chdb_so]},
exclude_package_data={'': ['*.pyc', 'src/**']},
ext_modules=ext_modules,
python_requires='>=3.7',
install_requires=['pyarrow', 'pandas'],
cmdclass={'build_ext': BuildExt},
test_suite="tests",
zip_safe=False,
Expand Down