Skip to content

Commit 5fc4540

Browse files
committed
feat: include duckdb in python bindings
1 parent 416c648 commit 5fc4540

File tree

6 files changed

+55
-13
lines changed

6 files changed

+55
-13
lines changed

python/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ name = "stacrs"
1313
crate-type = ["cdylib"]
1414

1515
[dependencies]
16+
duckdb = { workspace = true, features = [
17+
"bundled",
18+
] } # we don't use it directly, but we need to ensure it's bundled
1619
geojson = { workspace = true }
1720
pyo3 = { workspace = true, features = ["extension-module"] }
1821
pythonize = { workspace = true }
@@ -25,4 +28,5 @@ stac = { workspace = true, features = [
2528
"validate-blocking",
2629
] }
2730
stac-api = { workspace = true, features = ["client"] }
31+
stac-duckdb = { workspace = true }
2832
tokio = { workspace = true, features = ["rt"] }

python/data/extended-item.parquet

34.2 KB
Binary file not shown.

python/src/error.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ impl From<stac_api::Error> for Error {
1616
}
1717
}
1818

19+
impl From<stac_duckdb::Error> for Error {
20+
fn from(value: stac_duckdb::Error) -> Self {
21+
Error(value.to_string())
22+
}
23+
}
24+
1925
impl From<geojson::Error> for Error {
2026
fn from(value: geojson::Error) -> Self {
2127
Error(value.to_string())

python/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ mod search;
77
mod validate;
88
mod write;
99

10+
use duckdb as _;
1011
use error::Error;
1112
use pyo3::prelude::*;
1213

python/src/search.rs

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use pyo3::{
66
use serde::de::DeserializeOwned;
77
use stac::Format;
88
use stac_api::{BlockingClient, Fields, Item, ItemCollection, Items, Search};
9+
use stac_duckdb::Client;
910
use std::str::FromStr;
1011
use tokio::runtime::Builder;
1112

@@ -40,6 +41,9 @@ use tokio::runtime::Builder;
4041
/// will be interpreted as cql2-text, dictionaries as cql2-json.
4142
/// query (dict[str, Any] | None): Additional filtering based on properties.
4243
/// It is recommended to use filter instead, if possible.
44+
/// use_duckdb (bool | None): Query with DuckDB. If None and the href has a
45+
/// 'parquet' or 'geoparquet' extension, will be set to True. Defaults
46+
/// to None.
4347
///
4448
/// Returns:
4549
/// list[dict[str, Any]]: A list of the returned STAC items.
@@ -53,7 +57,7 @@ use tokio::runtime::Builder;
5357
/// ... max_items=1,
5458
/// ... )
5559
#[pyfunction]
56-
#[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None))]
60+
#[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, use_duckdb=None))]
5761
pub fn search<'py>(
5862
py: Python<'py>,
5963
href: String,
@@ -69,6 +73,7 @@ pub fn search<'py>(
6973
sortby: Option<StringOrList>,
7074
filter: Option<StringOrDict>,
7175
query: Option<Py<PyDict>>,
76+
use_duckdb: Option<bool>,
7277
) -> PyResult<Bound<'py, PyList>> {
7378
let items = search_items(
7479
href,
@@ -84,6 +89,7 @@ pub fn search<'py>(
8489
sortby,
8590
filter,
8691
query,
92+
use_duckdb,
8793
)?;
8894
pythonize::pythonize(py, &items)
8995
.map_err(PyErr::from)
@@ -126,6 +132,9 @@ pub fn search<'py>(
126132
/// format (str | None): The output format. If none, will be inferred from
127133
/// the outfile extension, and if that fails will fall back to compact JSON.
128134
/// options (list[tuple[str, str]] | None): Configuration values to pass to the object store backend.
135+
/// use_duckdb (bool | None): Query with DuckDB. If None and the href has a
136+
/// 'parquet' or 'geoparquet' extension, will be set to True. Defaults
137+
/// to None.
129138
///
130139
/// Returns:
131140
/// list[dict[str, Any]]: A list of the returned STAC items.
@@ -139,7 +148,7 @@ pub fn search<'py>(
139148
/// ... max_items=1,
140149
/// ... )
141150
#[pyfunction]
142-
#[pyo3(signature = (outfile, href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, format=None, options=None))]
151+
#[pyo3(signature = (outfile, href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, format=None, options=None, use_duckdb=None))]
143152
pub fn search_to(
144153
outfile: String,
145154
href: String,
@@ -157,6 +166,7 @@ pub fn search_to(
157166
query: Option<Py<PyDict>>,
158167
format: Option<String>,
159168
options: Option<Vec<(String, String)>>,
169+
use_duckdb: Option<bool>,
160170
) -> PyResult<usize> {
161171
let items = search_items(
162172
href,
@@ -172,6 +182,7 @@ pub fn search_to(
172182
sortby,
173183
filter,
174184
query,
185+
use_duckdb,
175186
)?;
176187
let format = format
177188
.map(|s| s.parse())
@@ -206,8 +217,8 @@ fn search_items(
206217
sortby: Option<StringOrList>,
207218
filter: Option<StringOrDict>,
208219
query: Option<Py<PyDict>>,
220+
use_duckdb: Option<bool>,
209221
) -> PyResult<Vec<Item>> {
210-
let client = BlockingClient::new(&href).map_err(Error::from)?;
211222
let mut fields = Fields::default();
212223
if let Some(include) = include {
213224
fields.include = include.into();
@@ -225,7 +236,7 @@ fn search_items(
225236
.map(|q| pythonize::depythonize(&q.into_bound(py)))
226237
.transpose()
227238
})?;
228-
let search = Search {
239+
let mut search = Search {
229240
intersects: intersects.map(|i| i.into()).transpose()?,
230241
ids: ids.map(|ids| ids.into()),
231242
collections: collections.map(|c| c.into()),
@@ -243,18 +254,33 @@ fn search_items(
243254
..Default::default()
244255
},
245256
};
246-
let items = client.search(search).map_err(Error::from)?;
247-
if let Some(max_items) = max_items {
248-
items
249-
.take(max_items)
250-
.collect::<Result<_, _>>()
257+
if use_duckdb
258+
.unwrap_or_else(|| matches!(Format::infer_from_href(&href), Some(Format::Geoparquet(_))))
259+
{
260+
if let Some(max_items) = max_items {
261+
search.items.limit = Some(max_items.try_into()?);
262+
}
263+
let client = Client::from_href(href).map_err(Error::from)?;
264+
client
265+
.search_to_json(search)
266+
.map(|item_collection| item_collection.items)
251267
.map_err(Error::from)
252268
.map_err(PyErr::from)
253269
} else {
254-
items
255-
.collect::<Result<_, _>>()
256-
.map_err(Error::from)
257-
.map_err(PyErr::from)
270+
let client = BlockingClient::new(&href).map_err(Error::from)?;
271+
let items = client.search(search).map_err(Error::from)?;
272+
if let Some(max_items) = max_items {
273+
items
274+
.take(max_items)
275+
.collect::<Result<_, _>>()
276+
.map_err(Error::from)
277+
.map_err(PyErr::from)
278+
} else {
279+
items
280+
.collect::<Result<_, _>>()
281+
.map_err(Error::from)
282+
.map_err(PyErr::from)
283+
}
258284
}
259285
}
260286

python/tests/test_search.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,8 @@ def test_search_to_geoparquet(tmp_path: Path) -> None:
4444
table = pyarrow.parquet.read_table(tmp_path / "out.parquet")
4545
items = list(stac_geoparquet.arrow.stac_table_to_items(table))
4646
assert len(items) == 1
47+
48+
49+
def test_search_geoparquet(data: Path) -> None:
50+
items = stacrs.search(str(data / "extended-item.parquet"))
51+
assert len(items) == 1

0 commit comments

Comments
 (0)