diff --git a/examples/to_pandas.py b/examples/to_pandas.py new file mode 100644 index 00000000000..f74c244b761 --- /dev/null +++ b/examples/to_pandas.py @@ -0,0 +1,31 @@ +#!python3 +import os +import pyarrow as pa +import chdb + +# get current file dir +current_dir = os.path.dirname(os.path.abspath(__file__)) +test_parquet = current_dir + "/../contrib/arrow/cpp/submodules/parquet-testing/data/alltypes_dictionary.parquet" + +# run SQL on parquet file and return arrow format +res = chdb.query(f"select * from file('{test_parquet}', Parquet)", "Arrow") +print("\nresult from chdb:") +print(res.get_memview().tobytes()) + +def to_arrowTable(res): + # convert arrow format to arrow table + paTable = pa.RecordBatchFileReader(res.get_memview()).read_all() + return paTable + +def to_df(res): + # convert arrow format to arrow table + paTable = to_arrowTable(res) + # convert arrow table to pandas dataframe + return paTable.to_pandas(use_threads=True) + +print("\nresult from chdb to pyarrow:") +print(to_arrowTable(res)) + +# convert arrow table to pandas dataframe +print("\nresult from chdb to pandas:") +print(to_df(res)) diff --git a/pybind/libtest.py b/pybind/libtest.py deleted file mode 100644 index 0a90a87b6fa..00000000000 --- a/pybind/libtest.py +++ /dev/null @@ -1,8 +0,0 @@ -import pyarrow as pa -import example - -# 获取 arrow::Table 对象 -table = example.queryToArrow("SELECT * FROM file('/home/Clickhouse/bench/result.parquet', Parquet) LIMIT 10") - -# 使用 pyarrow.lib.Table.from_batches 方法将其转换为 pyarrow.lib.Table -table = pa.lib.Table.from_batches([table]) \ No newline at end of file diff --git a/pybind/readarrow.py b/pybind/readarrow.py deleted file mode 100644 index f44e39f1bec..00000000000 --- a/pybind/readarrow.py +++ /dev/null @@ -1,5 +0,0 @@ -import pyarrow as pa -f1 = pa.OSFile("/home/Clickhouse/bench/result.arrow") -af = pa.ipc.open_file(f1, 23922).read_all() -print(type(af)) # pyarrow.lib.Table -print(af)