Skip to content

Added Dataframe output format #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion chdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import os
import pyarrow as pa

chdb_version = (0, 1, 0)
if sys.version_info[:2] >= (3, 7):
Expand All @@ -22,7 +23,19 @@
except: # pragma: no cover
__version__ = "unknown"

# wrap _chdb functions

def _to_arrowTable(res):
"""convert res to arrow table"""
return pa.RecordBatchFileReader(res.get_memview()).read_all()

def to_df(r):
""""convert arrow table to Dataframe"""
t = _to_arrowTable(r)
return t.to_pandas(use_threads=True)

# wrap _chdb functions
def query(sql, output_format="CSV", **kwargs):
if output_format.lower() == "dataframe":
r = _chdb.query(sql, "Arrow", **kwargs)
return to_df(r)
return _chdb.query(sql, output_format, **kwargs)
31 changes: 31 additions & 0 deletions chdb/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import sys
import argparse
from .__init__ import query

def main():
prog = 'python -m chdb'
description = ('''A simple command line interface for chdb
to run SQL and output in specified format''')
parser = argparse.ArgumentParser(prog=prog, description=description)
parser.add_argument('sql', nargs=1,
type=str,
help='sql, e.g: select 1112222222,555')
parser.add_argument('format', nargs='?',
type=str,
help='''sql result output format,
e.g: CSV, Dataframe, JSON etc,
more format checkout on
https://clickhouse.com/docs/en/interfaces/formats''',
default="CSV")
options = parser.parse_args()
sql = options.sql[0]
output_format = options.format
res = query(sql, output_format)
if output_format.lower() == 'dataframe':
temp = res
else:
temp = res.data()
print(temp, end="")

if __name__ == '__main__':
main()
2 changes: 2 additions & 0 deletions chdb/test_smoke.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ python3 -c \
python3 -c \
"import chdb; res = chdb.query('select version()', 'CSV'); print(str(res.get_memview().tobytes()))"

# test cli
python3 -m chdb "select 1112222222,555" Dataframe
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def build_extensions(self):
exclude_package_data={'': ['*.pyc', 'src/**']},
ext_modules=ext_modules,
python_requires='>=3.7',
install_requires=['pyarrow', 'pandas'],
cmdclass={'build_ext': BuildExt},
test_suite="tests",
zip_safe=False,
Expand Down