Skip to content

Commit 8b27225

Browse files
authored
refactor: duckdb (#711)
I'm removing a lot of the config and extension management, as that can be handled manually in Rust. We'll move a lot of the funkiness up to **rustac-py** so we have a Python interface to do stuff.
1 parent 6c284a1 commit 8b27225

File tree

10 files changed

+757
-817
lines changed

10 files changed

+757
-817
lines changed

crates/core/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1515
### Changed
1616

1717
- Default to snappy compression for geoparquet ([#673](https://github.com/stac-utils/rustac/pull/673))
18+
- Ensure geoparquet->json provides valid datetime strings (UTC) ([#711](https://github.com/stac-utils/rustac/pull/711)])
1819

1920
## [0.12.0] - 2025-01-31
2021

crates/core/src/geoarrow/json.rs

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ const TOP_LEVEL_KEYS: [&str; 10] = [
3838
"collection",
3939
];
4040

41+
use crate::Error;
4142
use arrow_array::RecordBatchReader;
4243
use arrow_array::{cast::*, types::*, *};
4344
use arrow_cast::display::{ArrayFormatter, FormatOptions};
4445
use arrow_json::JsonSerializable;
4546
use arrow_schema::*;
47+
use chrono::DateTime;
4648
use geo_traits::to_geo::{
4749
ToGeoGeometry, ToGeoGeometryCollection, ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint,
4850
ToGeoMultiPolygon, ToGeoPoint, ToGeoPolygon, ToGeoRect,
@@ -53,6 +55,8 @@ use geoarrow_array::cast::AsGeoArrowArray;
5355
use serde_json::{Value, json, map::Map as JsonMap};
5456
use std::iter;
5557

58+
use super::DATETIME_COLUMNS;
59+
5660
fn primitive_array_to_json<T>(array: &dyn Array) -> Result<Vec<Value>, ArrowError>
5761
where
5862
T: ArrowPrimitiveType,
@@ -427,7 +431,7 @@ fn set_column_for_json_rows(
427431
/// Creates JSON values from a record batch reader.
428432
pub fn from_record_batch_reader<R: RecordBatchReader>(
429433
reader: R,
430-
) -> Result<Vec<serde_json::Map<String, Value>>, crate::Error> {
434+
) -> Result<Vec<serde_json::Map<String, Value>>, Error> {
431435
use geoarrow_array::GeoArrowType;
432436

433437
let schema = reader.schema();
@@ -489,16 +493,18 @@ pub fn from_record_batch_reader<R: RecordBatchReader>(
489493
"geometry".into(),
490494
serde_json::to_value(geojson::Geometry::new(value))?,
491495
);
492-
items.push(unflatten(row));
496+
items.push(unflatten(row)?);
493497
}
494498
}
495499
} else {
496-
items = json_rows.map(unflatten).collect();
500+
items = json_rows.map(unflatten).collect::<Result<_, Error>>()?;
497501
}
498502
Ok(items)
499503
}
500504

501-
fn unflatten(mut item: serde_json::Map<String, Value>) -> serde_json::Map<String, Value> {
505+
fn unflatten(
506+
mut item: serde_json::Map<String, Value>,
507+
) -> Result<serde_json::Map<String, Value>, Error> {
502508
let mut properties = serde_json::Map::new();
503509
let keys: Vec<_> = item
504510
.keys()
@@ -512,13 +518,25 @@ fn unflatten(mut item: serde_json::Map<String, Value>) -> serde_json::Map<String
512518
.collect();
513519
for key in keys {
514520
if let Some(value) = item.remove(&key) {
515-
let _ = properties.insert(key, value);
521+
if DATETIME_COLUMNS.contains(&key.as_str()) {
522+
if let Some(value) = value.as_str() {
523+
let _ = properties.insert(
524+
key,
525+
DateTime::parse_from_rfc3339(value)?
526+
.to_utc()
527+
.to_rfc3339()
528+
.into(),
529+
);
530+
}
531+
} else {
532+
let _ = properties.insert(key, value);
533+
}
516534
}
517535
}
518536
if !properties.is_empty() {
519537
let _ = item.insert("properties".to_string(), Value::Object(properties));
520538
}
521-
item
539+
Ok(item)
522540
}
523541

524542
fn record_batches_to_json_rows(

crates/core/src/geoarrow/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ pub const VERSION_KEY: &str = "stac_geoparquet:version";
2222
/// The stac-geoparquet version.
2323
pub const VERSION: &str = "1.0.0";
2424

25-
const DATETIME_COLUMNS: [&str; 8] = [
25+
/// Geoarrow datetime columns
26+
pub const DATETIME_COLUMNS: [&str; 8] = [
2627
"datetime",
2728
"start_datetime",
2829
"end_datetime",

crates/duckdb/CHANGELOG.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
88

99
### Added
1010

11-
- Configure s3 credential chain ([#621](https://github.com/stac-utils/rustac/pull/621))
12-
- Read hive partitioned datasets, `Config` structure ([#624](https://github.com/stac-utils/rustac/pull/624))
13-
- `Client.search_to_arrow_table` ([#634](https://github.com/stac-utils/rustac/pull/634))
11+
- Read hive partitioned datasets ([#624](https://github.com/stac-utils/rustac/pull/624))
1412
- Conditionally disable parsing the WKB ([#635](https://github.com/stac-utils/rustac/pull/635))
1513
- `Client.extensions` ([#665](https://github.com/stac-utils/rustac/pull/665))
16-
- `Config.install_extensions` ([#681](https://github.com/stac-utils/rustac/pull/681))
17-
- `Config.from_href` ([#684](https://github.com/stac-utils/rustac/pull/684))
1814
- Filtering ([#699](https://github.com/stac-utils/rustac/pull/699))
1915

2016
### Removed

0 commit comments

Comments
 (0)