Skip to content

Commit 028329b

Browse files
authored
Merge pull request #13 from auxten/hotfix-pyarrow-dep
Try import pyarrow and pandas when necessary
2 parents a929964 + cd14bf7 commit 028329b

File tree

3 files changed

+52
-17
lines changed

3 files changed

+52
-17
lines changed

.github/workflows/build_wheels.yml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
name: Build
22

3-
on: [push, pull_request]
3+
on:
4+
push:
5+
tags:
6+
- 'v*'
7+
pull_request:
8+
branches:
9+
- pybind
410

511
jobs:
612
build_wheels_linux:
@@ -82,6 +88,7 @@ jobs:
8288
export CC=/usr/bin/clang
8389
export CXX=/usr/bin/clang++
8490
bash ./chdb/build.sh
91+
python3 -m pip install pandas pyarrow
8592
bash -x ./chdb/test_smoke.sh
8693
continue-on-error: false
8794
- name: Check ccache statistics
@@ -148,7 +155,6 @@ jobs:
148155
run: |
149156
pwd
150157
uname -a
151-
export HOMEBREW_NO_AUTO_UPDATE=1
152158
export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
153159
brew install git ccache ninja libtool gettext llvm@15 gcc binutils grep findutils zstd
154160
export PATH=$(brew --prefix llvm@15)/bin:$PATH
@@ -193,12 +199,13 @@ jobs:
193199
export CXX=$(brew --prefix llvm@15)/bin/clang++
194200
bash gen_manifest.sh
195201
bash ./chdb/build.sh
202+
python3 -m pip install pandas pyarrow
196203
bash -x ./chdb/test_smoke.sh
197204
continue-on-error: false
198205
- name: Keep killall ccache and wait for ccache to finish
199206
if: always()
200207
run: |
201-
sleep 300
208+
sleep 60
202209
while ps -ef | grep ccache | grep -v grep; do \
203210
killall ccache; \
204211
sleep 10; \
@@ -261,7 +268,6 @@ jobs:
261268
run: |
262269
pwd
263270
uname -a
264-
export HOMEBREW_NO_AUTO_UPDATE=1
265271
export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
266272
brew install git ccache ninja libtool gettext llvm@15 gcc binutils grep findutils zstd
267273
export PATH=$(brew --prefix llvm@15)/bin:$PATH
@@ -349,7 +355,7 @@ jobs:
349355
- name: Keep killall ccache and wait for ccache to finish
350356
if: always()
351357
run: |
352-
sleep 300
358+
sleep 60
353359
while ps -ef | grep ccache | grep -v grep; do \
354360
killall ccache; \
355361
sleep 10; \

chdb/__init__.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import sys
22
import os
3-
import pyarrow as pa
43

5-
chdb_version = (0, 1, 0)
4+
chdb_version = (0, 5, 0)
65
if sys.version_info[:2] >= (3, 7):
76
# get the path of the current file
87
current_path = os.path.dirname(os.path.abspath(__file__))
@@ -23,19 +22,32 @@
2322
except: # pragma: no cover
2423
__version__ = "unknown"
2524

26-
27-
def _to_arrowTable(res):
25+
# return pyarrow table
26+
def to_arrowTable(res):
2827
"""convert res to arrow table"""
28+
# try import pyarrow and pandas, if failed, raise ImportError with suggestion
29+
try:
30+
import pyarrow as pa
31+
import pandas
32+
except ImportError as e:
33+
print(f'ImportError: {e}')
34+
print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
35+
raise ImportError('Failed to import pyarrow or pandas') from None
36+
2937
return pa.RecordBatchFileReader(res.get_memview()).read_all()
3038

39+
# return pandas dataframe
3140
def to_df(r):
3241
""""convert arrow table to Dataframe"""
33-
t = _to_arrowTable(r)
42+
t = to_arrowTable(r)
3443
return t.to_pandas(use_threads=True)
3544

3645
# wrap _chdb functions
3746
def query(sql, output_format="CSV", **kwargs):
38-
if output_format.lower() == "dataframe":
39-
r = _chdb.query(sql, "Arrow", **kwargs)
40-
return to_df(r)
41-
return _chdb.query(sql, output_format, **kwargs)
47+
lower_output_format = output_format.lower()
48+
if lower_output_format == "dataframe":
49+
return to_df(_chdb.query(sql, "Arrow", **kwargs))
50+
elif lower_output_format == 'arrowtable':
51+
return to_arrowTable(_chdb.query(sql, "Arrow", **kwargs))
52+
else:
53+
return _chdb.query(sql, output_format, **kwargs)

setup.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import sys
3+
import re
34
import subprocess
45
import sysconfig
56
from setuptools import setup, Extension
@@ -57,6 +58,21 @@ def get_latest_git_tag(minor_ver_auto=False):
5758
print(e)
5859
raise
5960

61+
# replace the version in chdb/__init__.py, which is `chdb_version = (0, 1, 0)` by default
62+
# regex replace the version string `chdb_version = (0, 1, 0)` with version parts
63+
def fix_version_init(version):
64+
# split version string into parts
65+
p1, p2, p3 = version.split('.')
66+
init_file = os.path.join(script_dir, "chdb", "__init__.py")
67+
with open(init_file, "r+") as f:
68+
init_content = f.read()
69+
# regex replace the version string `chdb_version = (0, 1, 0)`
70+
regPattern = r"chdb_version = \(\d+, \d+, \d+\)"
71+
init_content = re.sub(regPattern, f"chdb_version = ({p1}, {p2}, {p3})", init_content)
72+
f.seek(0)
73+
f.write(init_content)
74+
f.truncate()
75+
6076

6177
# As of Python 3.6, CCompiler has a `has_flag` method.
6278
# cf http://bugs.python.org/issue26689
@@ -147,15 +163,16 @@ def build_extensions(self):
147163
extra_objects=[chdb_so],
148164
),
149165
]
150-
166+
# fix the version in chdb/__init__.py
167+
versionStr = get_latest_git_tag()
168+
fix_version_init(versionStr)
151169
setup(
152170
packages=['chdb'],
153-
version=get_latest_git_tag(),
171+
version=versionStr,
154172
package_data={'chdb': [chdb_so]},
155173
exclude_package_data={'': ['*.pyc', 'src/**']},
156174
ext_modules=ext_modules,
157175
python_requires='>=3.7',
158-
install_requires=['pyarrow', 'pandas'],
159176
cmdclass={'build_ext': BuildExt},
160177
test_suite="tests",
161178
zip_safe=False,

0 commit comments

Comments
 (0)