Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/pgduckdb/pgduckdb_metadata_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ uint64_t CacheVersion();
Oid ExtensionOid();
Oid SchemaOid();
Oid DuckdbRowOid();
Oid DuckdbStructOid();
Oid DuckdbUnresolvedTypeOid();
Oid DuckdbUnionOid();
Oid DuckdbMapOid();
Expand Down
12 changes: 12 additions & 0 deletions sql/pg_duckdb--0.3.0--0.4.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ DROP FUNCTION duckdb.cache_info;
DROP FUNCTION duckdb.cache;
DROP TYPE duckdb.cache_info;

-- New Data type to handle duckdb struct
CREATE TYPE duckdb.struct;
CREATE FUNCTION duckdb.struct_in(cstring) RETURNS duckdb.struct AS 'MODULE_PATHNAME', 'duckdb_struct_in' LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION duckdb.struct_out(duckdb.struct) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_struct_out' LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION duckdb.struct_subscript(internal) RETURNS internal AS 'MODULE_PATHNAME', 'duckdb_struct_subscript' LANGUAGE C IMMUTABLE STRICT;
CREATE TYPE duckdb.struct (
INTERNALLENGTH = VARIABLE,
INPUT = duckdb.struct_in,
OUTPUT = duckdb.struct_out,
SUBSCRIPT = duckdb.struct_subscript
);

DROP FUNCTION duckdb.install_extension(TEXT);
CREATE FUNCTION duckdb.install_extension(extension_name TEXT, source TEXT DEFAULT 'core') RETURNS void
SET search_path = pg_catalog, pg_temp
Expand Down
9 changes: 9 additions & 0 deletions src/pgduckdb_metadata_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ struct {
Oid schema_oid;
/* The OID of the duckdb.row type */
Oid row_oid;
/* The OID of the duckdb.struct type */
Oid struct_oid;
/* The OID of the duckdb.unresolved_type */
Oid unresolved_type_oid;
/* The OID of the duckdb.union type */
Expand Down Expand Up @@ -247,6 +249,7 @@ IsExtensionRegistered() {

cache.schema_oid = get_namespace_oid("duckdb", false);
cache.row_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("row"), cache.schema_oid);
cache.struct_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("struct"), cache.schema_oid);
cache.unresolved_type_oid =
GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("unresolved_type"), cache.schema_oid);

Expand Down Expand Up @@ -316,6 +319,12 @@ DuckdbRowOid() {
return cache.row_oid;
}

Oid
DuckdbStructOid() {
Assert(cache.valid);
return cache.struct_oid;
}

Oid
DuckdbUnresolvedTypeOid() {
Assert(cache.valid);
Expand Down
107 changes: 107 additions & 0 deletions src/pgduckdb_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,42 @@ CoerceRowSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
return coerced_expr;
}

// Cloned implementation from CoerceRowSubscriptToText
Node *
CoerceStructSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
if (!subscript->uidx) {
elog(ERROR, "Creating a slice out of duckdb.struct is not supported");
}

Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
int expr_location = exprLocation(subscript->uidx);
Oid subscript_expr_type = exprType(subscript_expr);

if (subscript->lidx) {
elog(ERROR, "Creating a slice out of duckdb.struct is not supported");
}

Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
if (!coerced_expr) {
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.struct subscript must have text type"),
parser_errposition(pstate, expr_location)));
}

if (!IsA(subscript_expr, Const)) {
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.struct subscript must be a constant"),
parser_errposition(pstate, expr_location)));
}

Const *subscript_const = castNode(Const, subscript_expr);
if (subscript_const->constisnull) {
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("duckdb.struct subscript cannot be NULL"),
parser_errposition(pstate, expr_location)));
}

return coerced_expr;
}

/*
* In Postgres all index operations in a row ar all slices or all plain
* index operations. If you mix them, all are converted to slices.
Expand Down Expand Up @@ -529,6 +565,69 @@ DECLARE_PG_FUNCTION(duckdb_row_subscript) {
PG_RETURN_POINTER(&duckdb_row_subscript_routines);
}

// Copied implementation from duckdb.row for duckdb.struct
void
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
bool isAssignment) {
/*
* We need to populate our cache for some of the code below. Normally this
* cache is populated at the start of our planner hook, but this function
* is being called from the parser.
*/
if (!pgduckdb::IsExtensionRegistered()) {
elog(ERROR, "BUG: Using duckdb.struct but the pg_duckdb extension is not installed");
}

if (isAssignment) {
elog(ERROR, "Assignment to duckdb.struct is not supported");
}

if (indirection == NIL) {
elog(ERROR, "Subscripting duckdb.struct with an empty subscript is not supported");
}

bool first = true;

// Transform each subscript expression
foreach_node(A_Indices, subscript, indirection) {
/* The first subscript needs to be a TEXT constant, since it should be
* a column reference. But the subscripts after that can be anything,
* DuckDB should interpret those. */
if (first) {
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, CoerceStructSubscriptToText(pstate, subscript));
if (isSlice) {
sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, NULL);
}
first = false;
continue;
}

AddSubscriptExpressions(sbsref, pstate, subscript, isSlice);
}

// Set the result type of the subscripting operation
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
sbsref->reftypmod = -1;
}

void
DuckdbStructSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState * /*sbsrefstate*/,
SubscriptExecSteps * /*exprstate*/) {
elog(ERROR, "Subscripting duckdb.struct is not supported in the Postgres Executor");
}

static SubscriptRoutines duckdb_struct_subscript_routines = {
.transform = DuckdbStructSubscriptTransform,
.exec_setup = DuckdbStructSubscriptExecSetup,
.fetch_strict = false,
.fetch_leakproof = true,
.store_leakproof = true,
};

DECLARE_PG_FUNCTION(duckdb_struct_subscript) {
PG_RETURN_POINTER(&duckdb_struct_subscript_routines);
}

/*
* DuckdbUnresolvedTypeSubscriptTransform is called by the parser when a
* subscripting operation is performed on a duckdb.unresolved_type. All this
Expand Down Expand Up @@ -597,6 +696,14 @@ DECLARE_PG_FUNCTION(duckdb_row_out) {
elog(ERROR, "Converting a duckdb.row to a string is not supported");
}

DECLARE_PG_FUNCTION(duckdb_struct_in) {
elog(ERROR, "Creating the duckdb.struct type is not supported");
}

DECLARE_PG_FUNCTION(duckdb_struct_out) {
return textout(fcinfo);
}

DECLARE_PG_FUNCTION(duckdb_unresolved_type_in) {
return textin(fcinfo);
}
Expand Down
29 changes: 27 additions & 2 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
#include "pgduckdb/pgduckdb_utils.hpp"
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
#include "pgduckdb/scan/postgres_scan.hpp"
#include "pgduckdb/pg/types.hpp"

Expand Down Expand Up @@ -493,6 +494,20 @@ ConvertUUIDDatum(const duckdb::Value &value) {
return UUIDPGetDatum(postgres_uuid);
}

inline Datum
ConvertDuckStructDatum(const duckdb::Value &value) {
// similar to varchar and union
D_ASSERT(value.type().id() == duckdb::LogicalTypeId::STRUCT);
auto str = value.ToString();
auto varchar = str.c_str();
auto varchar_len = str.size();

text *result = (text *)palloc0(varchar_len + VARHDRSZ);
SET_VARSIZE(result, varchar_len + VARHDRSZ);
memcpy(VARDATA(result), varchar, varchar_len);
return PointerGetDatum(result);
}

static Datum
ConvertUnionDatum(const duckdb::Value &value) {
D_ASSERT(value.type().id() == duckdb::LogicalTypeId::UNION);
Expand Down Expand Up @@ -1109,8 +1124,13 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
ConvertDuckToPostgresArray<ByteArray>(slot, value, col);
break;
}
default:
if (oid == pgduckdb::DuckdbUnionOid()) {
default: {
// Since DuckdbRowOid is calculated at runtime, it is not possible to compile the
// code while placing it as a separate case in the switch-case clause above
if (oid == pgduckdb::DuckdbStructOid()) {
slot->tts_values[col] = ConvertDuckStructDatum(value);
return true;
} else if (oid == pgduckdb::DuckdbUnionOid()) {
slot->tts_values[col] = ConvertUnionDatum(value);
return true;
} else if (oid == pgduckdb::DuckdbMapOid()) {
Expand All @@ -1120,6 +1140,7 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid);
return false;
}
}
return true;
}

Expand Down Expand Up @@ -1216,6 +1237,8 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) {
default:
if (typoid == pgduckdb::DuckdbUnionOid()) {
return duckdb::LogicalTypeId::UNION;
} else if (typoid == pgduckdb::DuckdbStructOid()) {
return duckdb::LogicalTypeId::STRUCT;
}
return duckdb::LogicalType::USER("UnsupportedPostgresType (Oid=" + std::to_string(attribute->atttypid) + ")");
}
Expand Down Expand Up @@ -1363,6 +1386,8 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type) {
return UUIDOID;
case duckdb::LogicalTypeId::VARINT:
return NUMERICOID;
case duckdb::LogicalTypeId::STRUCT:
return pgduckdb::DuckdbStructOid();
case duckdb::LogicalTypeId::LIST:
case duckdb::LogicalTypeId::ARRAY: {
const duckdb::LogicalType *duck_type = &type;
Expand Down
19 changes: 13 additions & 6 deletions test/regression/expected/json_functions_duckdb.out
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,19 @@ SELECT public.json_group_structure(j) FROM example2;
-- ('{"family": "canidae", "species": ["labrador", "bulldog"], "hair": true}');
-- -- <JSON_TRANSFORM>
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') FROM example2;
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" VARCHAR, coolness DOUBLE) to Postgres type
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
json_transform
-----------------------------------------
{'family': anatidae, 'coolness': 42.42}
{'family': canidae, 'coolness': NULL}
(2 rows)

SELECT public.json_transform(j, '{"family": "TINYINT", "coolness": "DECIMAL(4, 2)"}') FROM example2;
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" TINYINT, coolness DECIMAL(4,2)) to Postgres type
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
json_transform
-------------------------------------
{'family': NULL, 'coolness': 42.42}
{'family': NULL, 'coolness': NULL}
(2 rows)

SELECT res['family'] family, res['coolness'] coolness FROM (
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') res FROM example2
) q;
Expand Down Expand Up @@ -373,8 +381,7 @@ SELECT res['family'] family, res['coolness'] coolness FROM (
-- -- </JSON_TRANSFORM>
-- -- <JSON_TRANSFORM_STRICT>
SELECT public.json_transform_strict(j, '{"family": "TINYINT", "coolness": "DOUBLE"}') FROM example2;
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" TINYINT, coolness DOUBLE) to Postgres type
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
ERROR: (PGDuckDB/Duckdb_ExecCustomScan_Cpp) Invalid Input Error: Failed to cast value to numerical: "canidae"
SELECT res['family'] family FROM (
SELECT public.json_transform_strict(j, '{"family": "VARCHAR"}') res FROM example2
) q;
Expand Down