Skip to content

Commit 4411d3b

Browse files
destrex271JelteF
andauthored
Support for STRUCT type (#669)
This introduces a `duckdb.struct` type to postgres which can be used to read the `STRUCT` type in DuckDB. Fixes #599 --------- Co-authored-by: Jelte Fennema-Nio <jelte@motherduck.com>
1 parent 57b59b4 commit 4411d3b

File tree

6 files changed

+152
-21
lines changed

6 files changed

+152
-21
lines changed

include/pgduckdb/pgduckdb_metadata_cache.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ uint64_t CacheVersion();
99
Oid ExtensionOid();
1010
Oid SchemaOid();
1111
Oid DuckdbRowOid();
12+
Oid DuckdbStructOid();
1213
Oid DuckdbUnresolvedTypeOid();
1314
Oid DuckdbUnionOid();
1415
Oid DuckdbMapOid();

sql/pg_duckdb--0.3.0--0.4.0.sql

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,53 @@ DROP FUNCTION duckdb.cache_info;
66
DROP FUNCTION duckdb.cache;
77
DROP TYPE duckdb.cache_info;
88

9+
-- New Data type to handle duckdb struct
10+
CREATE TYPE duckdb.struct;
11+
CREATE FUNCTION duckdb.struct_in(cstring) RETURNS duckdb.struct AS 'MODULE_PATHNAME', 'duckdb_struct_in' LANGUAGE C IMMUTABLE STRICT;
12+
CREATE FUNCTION duckdb.struct_out(duckdb.struct) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_struct_out' LANGUAGE C IMMUTABLE STRICT;
13+
CREATE FUNCTION duckdb.struct_subscript(internal) RETURNS internal AS 'MODULE_PATHNAME', 'duckdb_struct_subscript' LANGUAGE C IMMUTABLE STRICT;
14+
CREATE TYPE duckdb.struct (
15+
INTERNALLENGTH = VARIABLE,
16+
INPUT = duckdb.struct_in,
17+
OUTPUT = duckdb.struct_out,
18+
SUBSCRIPT = duckdb.struct_subscript
19+
);
20+
21+
-- Update JSON functions that return STRUCT to actually return the struct type.
22+
-- To do so we need to drop + create them.
23+
DROP FUNCTION @extschema@.json_transform("json" duckdb.json, structure duckdb.json);
24+
DROP FUNCTION @extschema@.from_json("json" duckdb.json, structure duckdb.json);
25+
DROP FUNCTION @extschema@.json_transform_strict("json" duckdb.json, structure duckdb.json);
26+
DROP FUNCTION @extschema@.from_json_strict("json" duckdb.json, structure duckdb.json);
27+
28+
-- json_transform
29+
CREATE FUNCTION @extschema@.json_transform("json" duckdb.json, structure duckdb.json)
30+
RETURNS duckdb.struct
31+
SET search_path = pg_catalog, pg_temp
32+
AS 'MODULE_PATHNAME', 'duckdb_only_function'
33+
LANGUAGE C;
34+
35+
-- from_json
36+
CREATE FUNCTION @extschema@.from_json("json" duckdb.json, structure duckdb.json)
37+
RETURNS duckdb.struct
38+
SET search_path = pg_catalog, pg_temp
39+
AS 'MODULE_PATHNAME', 'duckdb_only_function'
40+
LANGUAGE C;
41+
42+
-- json_transform_strict
43+
CREATE FUNCTION @extschema@.json_transform_strict("json" duckdb.json, structure duckdb.json)
44+
RETURNS duckdb.struct
45+
SET search_path = pg_catalog, pg_temp
46+
AS 'MODULE_PATHNAME', 'duckdb_only_function'
47+
LANGUAGE C;
48+
49+
-- from_json_strict
50+
CREATE FUNCTION @extschema@.from_json_strict("json" duckdb.json, structure duckdb.json)
51+
RETURNS duckdb.struct
52+
SET search_path = pg_catalog, pg_temp
53+
AS 'MODULE_PATHNAME', 'duckdb_only_function'
54+
LANGUAGE C;
55+
956
DROP FUNCTION duckdb.install_extension(TEXT);
1057
CREATE FUNCTION duckdb.install_extension(extension_name TEXT, source TEXT DEFAULT 'core') RETURNS void
1158
SET search_path = pg_catalog, pg_temp
@@ -521,4 +568,4 @@ CREATE TYPE duckdb.map(
521568
INTERNALLENGTH = VARIABLE,
522569
INPUT = duckdb.map_in,
523570
OUTPUT = duckdb.map_out
524-
);
571+
);

src/pgduckdb_metadata_cache.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ struct {
6666
Oid schema_oid;
6767
/* The OID of the duckdb.row type */
6868
Oid row_oid;
69+
/* The OID of the duckdb.struct type */
70+
Oid struct_oid;
6971
/* The OID of the duckdb.unresolved_type */
7072
Oid unresolved_type_oid;
7173
/* The OID of the duckdb.union type */
@@ -247,6 +249,7 @@ IsExtensionRegistered() {
247249

248250
cache.schema_oid = get_namespace_oid("duckdb", false);
249251
cache.row_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("row"), cache.schema_oid);
252+
cache.struct_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("struct"), cache.schema_oid);
250253
cache.unresolved_type_oid =
251254
GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("unresolved_type"), cache.schema_oid);
252255

@@ -316,6 +319,12 @@ DuckdbRowOid() {
316319
return cache.row_oid;
317320
}
318321

322+
Oid
323+
DuckdbStructOid() {
324+
Assert(cache.valid);
325+
return cache.struct_oid;
326+
}
327+
319328
Oid
320329
DuckdbUnresolvedTypeOid() {
321330
Assert(cache.valid);

src/pgduckdb_options.cpp

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -383,40 +383,51 @@ DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) {
383383
}
384384

385385
Node *
386-
CoerceRowSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
386+
CoerceSubscriptToText(struct ParseState *pstate, A_Indices *subscript, const char *type_name) {
387387
if (!subscript->uidx) {
388-
elog(ERROR, "Creating a slice out of duckdb.row is not supported");
388+
elog(ERROR, "Creating a slice out of %s is not supported", type_name);
389389
}
390390

391391
Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
392392
int expr_location = exprLocation(subscript->uidx);
393393
Oid subscript_expr_type = exprType(subscript_expr);
394394

395395
if (subscript->lidx) {
396-
elog(ERROR, "Creating a slice out of duckdb.row is not supported");
396+
elog(ERROR, "Creating a slice out of %s is not supported", type_name);
397397
}
398398

399399
Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
400400
COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
401401
if (!coerced_expr) {
402-
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must have text type"),
402+
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("%s subscript must have text type", type_name),
403403
parser_errposition(pstate, expr_location)));
404404
}
405405

406406
if (!IsA(subscript_expr, Const)) {
407-
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must be a constant"),
407+
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("%s subscript must be a constant", type_name),
408408
parser_errposition(pstate, expr_location)));
409409
}
410410

411411
Const *subscript_const = castNode(Const, subscript_expr);
412412
if (subscript_const->constisnull) {
413-
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("duckdb.row subscript cannot be NULL"),
413+
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("%s subscript cannot be NULL", type_name),
414414
parser_errposition(pstate, expr_location)));
415415
}
416416

417417
return coerced_expr;
418418
}
419419

420+
Node *
421+
CoerceRowSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
422+
return CoerceSubscriptToText(pstate, subscript, "duckdb.row");
423+
}
424+
425+
// Cloned implementation from CoerceRowSubscriptToText
426+
Node *
427+
CoerceStructSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
428+
return CoerceSubscriptToText(pstate, subscript, "duckdb.struct");
429+
}
430+
420431
/*
421432
* In Postgres all index operations in a row ar all slices or all plain
422433
* index operations. If you mix them, all are converted to slices.
@@ -456,29 +467,29 @@ AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, A_In
456467
}
457468

458469
/*
459-
* DuckdbRowSubscriptTransform is called by the parser when a subscripting
470+
* DuckdbSubscriptTransform is called by the parser when a subscripting
460471
* operation is performed on a duckdb.row. It has two main puprposes:
461472
* 1. Ensure that the row is being indexed using a string literal
462473
* 2. Ensure that the return type of this index operation is duckdb.unresolved_type
463474
*/
464475
void
465-
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
466-
bool isAssignment) {
476+
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
477+
bool isAssignment, const char *type_name) {
467478
/*
468479
* We need to populate our cache for some of the code below. Normally this
469480
* cache is populated at the start of our planner hook, but this function
470481
* is being called from the parser.
471482
*/
472483
if (!pgduckdb::IsExtensionRegistered()) {
473-
elog(ERROR, "BUG: Using duckdb.row but the pg_duckdb extension is not installed");
484+
elog(ERROR, "BUG: Using %s but the pg_duckdb extension is not installed", type_name);
474485
}
475486

476487
if (isAssignment) {
477-
elog(ERROR, "Assignment to duckdb.row is not supported");
488+
elog(ERROR, "Assignment to %s is not supported", type_name);
478489
}
479490

480491
if (indirection == NIL) {
481-
elog(ERROR, "Subscripting duckdb.row with an empty subscript is not supported");
492+
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
482493
}
483494

484495
bool first = true;
@@ -505,6 +516,18 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
505516
sbsref->reftypmod = -1;
506517
}
507518

519+
void
520+
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
521+
bool isAssignment) {
522+
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.row");
523+
}
524+
525+
void
526+
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
527+
bool isAssignment) {
528+
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.struct");
529+
}
530+
508531
/*
509532
* DuckdbRowSubscriptExecSetup is called by the executor when a subscripting
510533
* operation is performed on a duckdb.row. This should never happen, because
@@ -529,6 +552,24 @@ DECLARE_PG_FUNCTION(duckdb_row_subscript) {
529552
PG_RETURN_POINTER(&duckdb_row_subscript_routines);
530553
}
531554

555+
void
556+
DuckdbStructSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState * /*sbsrefstate*/,
557+
SubscriptExecSteps * /*exprstate*/) {
558+
elog(ERROR, "Subscripting duckdb.struct is not supported in the Postgres Executor");
559+
}
560+
561+
static SubscriptRoutines duckdb_struct_subscript_routines = {
562+
.transform = DuckdbStructSubscriptTransform,
563+
.exec_setup = DuckdbStructSubscriptExecSetup,
564+
.fetch_strict = false,
565+
.fetch_leakproof = true,
566+
.store_leakproof = true,
567+
};
568+
569+
DECLARE_PG_FUNCTION(duckdb_struct_subscript) {
570+
PG_RETURN_POINTER(&duckdb_struct_subscript_routines);
571+
}
572+
532573
/*
533574
* DuckdbUnresolvedTypeSubscriptTransform is called by the parser when a
534575
* subscripting operation is performed on a duckdb.unresolved_type. All this
@@ -597,6 +638,14 @@ DECLARE_PG_FUNCTION(duckdb_row_out) {
597638
elog(ERROR, "Converting a duckdb.row to a string is not supported");
598639
}
599640

641+
DECLARE_PG_FUNCTION(duckdb_struct_in) {
642+
elog(ERROR, "Creating the duckdb.struct type is not supported");
643+
}
644+
645+
DECLARE_PG_FUNCTION(duckdb_struct_out) {
646+
return textout(fcinfo);
647+
}
648+
600649
DECLARE_PG_FUNCTION(duckdb_unresolved_type_in) {
601650
return textin(fcinfo);
602651
}

src/pgduckdb_types.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "pgduckdb/pgduckdb_types.hpp"
88
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
99
#include "pgduckdb/pgduckdb_utils.hpp"
10+
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
1011
#include "pgduckdb/scan/postgres_scan.hpp"
1112
#include "pgduckdb/pg/types.hpp"
1213

@@ -493,6 +494,12 @@ ConvertUUIDDatum(const duckdb::Value &value) {
493494
return UUIDPGetDatum(postgres_uuid);
494495
}
495496

497+
inline Datum
498+
ConvertDuckStructDatum(const duckdb::Value &value) {
499+
D_ASSERT(value.type().id() == duckdb::LogicalTypeId::STRUCT);
500+
return ConvertToStringDatum(value);
501+
}
502+
496503
static Datum
497504
ConvertUnionDatum(const duckdb::Value &value) {
498505
D_ASSERT(value.type().id() == duckdb::LogicalTypeId::UNION);
@@ -1109,8 +1116,14 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
11091116
ConvertDuckToPostgresArray<ByteArray>(slot, value, col);
11101117
break;
11111118
}
1112-
default:
1113-
if (oid == pgduckdb::DuckdbUnionOid()) {
1119+
default: {
1120+
// Since oids of the following types calculated at runtime, it is not
1121+
// possible to compile the code while placing it as a separate case
1122+
// in the switch-case clause above.
1123+
if (oid == pgduckdb::DuckdbStructOid()) {
1124+
slot->tts_values[col] = ConvertDuckStructDatum(value);
1125+
return true;
1126+
} else if (oid == pgduckdb::DuckdbUnionOid()) {
11141127
slot->tts_values[col] = ConvertUnionDatum(value);
11151128
return true;
11161129
} else if (oid == pgduckdb::DuckdbMapOid()) {
@@ -1120,6 +1133,7 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
11201133
elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid);
11211134
return false;
11221135
}
1136+
}
11231137
return true;
11241138
}
11251139

@@ -1216,6 +1230,8 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) {
12161230
default:
12171231
if (typoid == pgduckdb::DuckdbUnionOid()) {
12181232
return duckdb::LogicalTypeId::UNION;
1233+
} else if (typoid == pgduckdb::DuckdbStructOid()) {
1234+
return duckdb::LogicalTypeId::STRUCT;
12191235
}
12201236
return duckdb::LogicalType::USER("UnsupportedPostgresType (Oid=" + std::to_string(attribute->atttypid) + ")");
12211237
}
@@ -1363,6 +1379,8 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type) {
13631379
return UUIDOID;
13641380
case duckdb::LogicalTypeId::VARINT:
13651381
return NUMERICOID;
1382+
case duckdb::LogicalTypeId::STRUCT:
1383+
return pgduckdb::DuckdbStructOid();
13661384
case duckdb::LogicalTypeId::LIST:
13671385
case duckdb::LogicalTypeId::ARRAY: {
13681386
const duckdb::LogicalType *duck_type = &type;

test/regression/expected/json_functions_duckdb.out

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -338,11 +338,19 @@ SELECT public.json_group_structure(j) FROM example2;
338338
-- ('{"family": "canidae", "species": ["labrador", "bulldog"], "hair": true}');
339339
-- -- <JSON_TRANSFORM>
340340
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') FROM example2;
341-
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" VARCHAR, coolness DOUBLE) to Postgres type
342-
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
341+
json_transform
342+
-----------------------------------------
343+
{'family': anatidae, 'coolness': 42.42}
344+
{'family': canidae, 'coolness': NULL}
345+
(2 rows)
346+
343347
SELECT public.json_transform(j, '{"family": "TINYINT", "coolness": "DECIMAL(4, 2)"}') FROM example2;
344-
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" TINYINT, coolness DECIMAL(4,2)) to Postgres type
345-
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
348+
json_transform
349+
-------------------------------------
350+
{'family': NULL, 'coolness': 42.42}
351+
{'family': NULL, 'coolness': NULL}
352+
(2 rows)
353+
346354
SELECT res['family'] family, res['coolness'] coolness FROM (
347355
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') res FROM example2
348356
) q;
@@ -373,8 +381,7 @@ SELECT res['family'] family, res['coolness'] coolness FROM (
373381
-- -- </JSON_TRANSFORM>
374382
-- -- <JSON_TRANSFORM_STRICT>
375383
SELECT public.json_transform_strict(j, '{"family": "TINYINT", "coolness": "DOUBLE"}') FROM example2;
376-
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" TINYINT, coolness DOUBLE) to Postgres type
377-
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
384+
ERROR: (PGDuckDB/Duckdb_ExecCustomScan_Cpp) Invalid Input Error: Failed to cast value to numerical: "canidae"
378385
SELECT res['family'] family FROM (
379386
SELECT public.json_transform_strict(j, '{"family": "VARCHAR"}') res FROM example2
380387
) q;

0 commit comments

Comments
 (0)