Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/pgduckdb/pgduckdb_metadata_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ uint64_t CacheVersion();
Oid ExtensionOid();
Oid SchemaOid();
Oid DuckdbRowOid();
Oid DuckdbStructOid();
Oid DuckdbUnresolvedTypeOid();
Oid DuckdbUnionOid();
Oid DuckdbMapOid();
Expand Down
49 changes: 48 additions & 1 deletion sql/pg_duckdb--0.3.0--0.4.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,53 @@ DROP FUNCTION duckdb.cache_info;
DROP FUNCTION duckdb.cache;
DROP TYPE duckdb.cache_info;

-- New Data type to handle duckdb struct
CREATE TYPE duckdb.struct;
CREATE FUNCTION duckdb.struct_in(cstring) RETURNS duckdb.struct AS 'MODULE_PATHNAME', 'duckdb_struct_in' LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION duckdb.struct_out(duckdb.struct) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_struct_out' LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION duckdb.struct_subscript(internal) RETURNS internal AS 'MODULE_PATHNAME', 'duckdb_struct_subscript' LANGUAGE C IMMUTABLE STRICT;
CREATE TYPE duckdb.struct (
INTERNALLENGTH = VARIABLE,
INPUT = duckdb.struct_in,
OUTPUT = duckdb.struct_out,
SUBSCRIPT = duckdb.struct_subscript
);

-- Update JSON functions that return STRUCT to actually return the struct type.
-- To do so we need to drop + create them.
DROP FUNCTION @extschema@.json_transform("json" duckdb.json, structure duckdb.json);
DROP FUNCTION @extschema@.from_json("json" duckdb.json, structure duckdb.json);
DROP FUNCTION @extschema@.json_transform_strict("json" duckdb.json, structure duckdb.json);
DROP FUNCTION @extschema@.from_json_strict("json" duckdb.json, structure duckdb.json);

-- json_transform
CREATE FUNCTION @extschema@.json_transform("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.struct
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- from_json
CREATE FUNCTION @extschema@.from_json("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.struct
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_transform_strict
CREATE FUNCTION @extschema@.json_transform_strict("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.struct
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- from_json_strict
CREATE FUNCTION @extschema@.from_json_strict("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.struct
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

DROP FUNCTION duckdb.install_extension(TEXT);
CREATE FUNCTION duckdb.install_extension(extension_name TEXT, source TEXT DEFAULT 'core') RETURNS void
SET search_path = pg_catalog, pg_temp
Expand Down Expand Up @@ -521,4 +568,4 @@ CREATE TYPE duckdb.map(
INTERNALLENGTH = VARIABLE,
INPUT = duckdb.map_in,
OUTPUT = duckdb.map_out
);
);
9 changes: 9 additions & 0 deletions src/pgduckdb_metadata_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ struct {
Oid schema_oid;
/* The OID of the duckdb.row type */
Oid row_oid;
/* The OID of the duckdb.struct type */
Oid struct_oid;
/* The OID of the duckdb.unresolved_type */
Oid unresolved_type_oid;
/* The OID of the duckdb.union type */
Expand Down Expand Up @@ -247,6 +249,7 @@ IsExtensionRegistered() {

cache.schema_oid = get_namespace_oid("duckdb", false);
cache.row_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("row"), cache.schema_oid);
cache.struct_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("struct"), cache.schema_oid);
cache.unresolved_type_oid =
GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("unresolved_type"), cache.schema_oid);

Expand Down Expand Up @@ -316,6 +319,12 @@ DuckdbRowOid() {
return cache.row_oid;
}

Oid
DuckdbStructOid() {
Assert(cache.valid);
return cache.struct_oid;
}

Oid
DuckdbUnresolvedTypeOid() {
Assert(cache.valid);
Expand Down
73 changes: 61 additions & 12 deletions src/pgduckdb_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,40 +383,51 @@ DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) {
}

Node *
CoerceRowSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
CoerceSubscriptToText(struct ParseState *pstate, A_Indices *subscript, const char *type_name) {
if (!subscript->uidx) {
elog(ERROR, "Creating a slice out of duckdb.row is not supported");
elog(ERROR, "Creating a slice out of %s is not supported", type_name);
}

Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
int expr_location = exprLocation(subscript->uidx);
Oid subscript_expr_type = exprType(subscript_expr);

if (subscript->lidx) {
elog(ERROR, "Creating a slice out of duckdb.row is not supported");
elog(ERROR, "Creating a slice out of %s is not supported", type_name);
}

Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
if (!coerced_expr) {
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must have text type"),
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("%s subscript must have text type", type_name),
parser_errposition(pstate, expr_location)));
}

if (!IsA(subscript_expr, Const)) {
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must be a constant"),
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("%s subscript must be a constant", type_name),
parser_errposition(pstate, expr_location)));
}

Const *subscript_const = castNode(Const, subscript_expr);
if (subscript_const->constisnull) {
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("duckdb.row subscript cannot be NULL"),
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("%s subscript cannot be NULL", type_name),
parser_errposition(pstate, expr_location)));
}

return coerced_expr;
}

Node *
CoerceRowSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
return CoerceSubscriptToText(pstate, subscript, "duckdb.row");
}

// Cloned implementation from CoerceRowSubscriptToText
Node *
CoerceStructSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
return CoerceSubscriptToText(pstate, subscript, "duckdb.struct");
}

/*
* In Postgres all index operations in a row ar all slices or all plain
* index operations. If you mix them, all are converted to slices.
Expand Down Expand Up @@ -456,29 +467,29 @@ AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, A_In
}

/*
* DuckdbRowSubscriptTransform is called by the parser when a subscripting
* DuckdbSubscriptTransform is called by the parser when a subscripting
* operation is performed on a duckdb.row. It has two main puprposes:
* 1. Ensure that the row is being indexed using a string literal
* 2. Ensure that the return type of this index operation is duckdb.unresolved_type
*/
void
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
bool isAssignment) {
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
bool isAssignment, const char *type_name) {
/*
* We need to populate our cache for some of the code below. Normally this
* cache is populated at the start of our planner hook, but this function
* is being called from the parser.
*/
if (!pgduckdb::IsExtensionRegistered()) {
elog(ERROR, "BUG: Using duckdb.row but the pg_duckdb extension is not installed");
elog(ERROR, "BUG: Using %s but the pg_duckdb extension is not installed", type_name);
}

if (isAssignment) {
elog(ERROR, "Assignment to duckdb.row is not supported");
elog(ERROR, "Assignment to %s is not supported", type_name);
}

if (indirection == NIL) {
elog(ERROR, "Subscripting duckdb.row with an empty subscript is not supported");
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
}

bool first = true;
Expand All @@ -505,6 +516,18 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
sbsref->reftypmod = -1;
}

void
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
bool isAssignment) {
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.row");
}

void
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
bool isAssignment) {
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.struct");
}

/*
* DuckdbRowSubscriptExecSetup is called by the executor when a subscripting
* operation is performed on a duckdb.row. This should never happen, because
Expand All @@ -529,6 +552,24 @@ DECLARE_PG_FUNCTION(duckdb_row_subscript) {
PG_RETURN_POINTER(&duckdb_row_subscript_routines);
}

void
DuckdbStructSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState * /*sbsrefstate*/,
SubscriptExecSteps * /*exprstate*/) {
elog(ERROR, "Subscripting duckdb.struct is not supported in the Postgres Executor");
}

static SubscriptRoutines duckdb_struct_subscript_routines = {
.transform = DuckdbStructSubscriptTransform,
.exec_setup = DuckdbStructSubscriptExecSetup,
.fetch_strict = false,
.fetch_leakproof = true,
.store_leakproof = true,
};

DECLARE_PG_FUNCTION(duckdb_struct_subscript) {
PG_RETURN_POINTER(&duckdb_struct_subscript_routines);
}

/*
* DuckdbUnresolvedTypeSubscriptTransform is called by the parser when a
* subscripting operation is performed on a duckdb.unresolved_type. All this
Expand Down Expand Up @@ -597,6 +638,14 @@ DECLARE_PG_FUNCTION(duckdb_row_out) {
elog(ERROR, "Converting a duckdb.row to a string is not supported");
}

DECLARE_PG_FUNCTION(duckdb_struct_in) {
elog(ERROR, "Creating the duckdb.struct type is not supported");
}

DECLARE_PG_FUNCTION(duckdb_struct_out) {
return textout(fcinfo);
}

DECLARE_PG_FUNCTION(duckdb_unresolved_type_in) {
return textin(fcinfo);
}
Expand Down
22 changes: 20 additions & 2 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
#include "pgduckdb/pgduckdb_utils.hpp"
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
#include "pgduckdb/scan/postgres_scan.hpp"
#include "pgduckdb/pg/types.hpp"

Expand Down Expand Up @@ -493,6 +494,12 @@ ConvertUUIDDatum(const duckdb::Value &value) {
return UUIDPGetDatum(postgres_uuid);
}

inline Datum
ConvertDuckStructDatum(const duckdb::Value &value) {
D_ASSERT(value.type().id() == duckdb::LogicalTypeId::STRUCT);
return ConvertToStringDatum(value);
}

static Datum
ConvertUnionDatum(const duckdb::Value &value) {
D_ASSERT(value.type().id() == duckdb::LogicalTypeId::UNION);
Expand Down Expand Up @@ -1109,8 +1116,14 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
ConvertDuckToPostgresArray<ByteArray>(slot, value, col);
break;
}
default:
if (oid == pgduckdb::DuckdbUnionOid()) {
default: {
// Since oids of the following types calculated at runtime, it is not
// possible to compile the code while placing it as a separate case
// in the switch-case clause above.
if (oid == pgduckdb::DuckdbStructOid()) {
slot->tts_values[col] = ConvertDuckStructDatum(value);
return true;
} else if (oid == pgduckdb::DuckdbUnionOid()) {
slot->tts_values[col] = ConvertUnionDatum(value);
return true;
} else if (oid == pgduckdb::DuckdbMapOid()) {
Expand All @@ -1120,6 +1133,7 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid);
return false;
}
}
return true;
}

Expand Down Expand Up @@ -1216,6 +1230,8 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) {
default:
if (typoid == pgduckdb::DuckdbUnionOid()) {
return duckdb::LogicalTypeId::UNION;
} else if (typoid == pgduckdb::DuckdbStructOid()) {
return duckdb::LogicalTypeId::STRUCT;
}
return duckdb::LogicalType::USER("UnsupportedPostgresType (Oid=" + std::to_string(attribute->atttypid) + ")");
}
Expand Down Expand Up @@ -1363,6 +1379,8 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type) {
return UUIDOID;
case duckdb::LogicalTypeId::VARINT:
return NUMERICOID;
case duckdb::LogicalTypeId::STRUCT:
return pgduckdb::DuckdbStructOid();
case duckdb::LogicalTypeId::LIST:
case duckdb::LogicalTypeId::ARRAY: {
const duckdb::LogicalType *duck_type = &type;
Expand Down
19 changes: 13 additions & 6 deletions test/regression/expected/json_functions_duckdb.out
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,19 @@ SELECT public.json_group_structure(j) FROM example2;
-- ('{"family": "canidae", "species": ["labrador", "bulldog"], "hair": true}');
-- -- <JSON_TRANSFORM>
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') FROM example2;
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" VARCHAR, coolness DOUBLE) to Postgres type
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
json_transform
-----------------------------------------
{'family': anatidae, 'coolness': 42.42}
{'family': canidae, 'coolness': NULL}
(2 rows)

SELECT public.json_transform(j, '{"family": "TINYINT", "coolness": "DECIMAL(4, 2)"}') FROM example2;
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" TINYINT, coolness DECIMAL(4,2)) to Postgres type
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
json_transform
-------------------------------------
{'family': NULL, 'coolness': 42.42}
{'family': NULL, 'coolness': NULL}
(2 rows)

SELECT res['family'] family, res['coolness'] coolness FROM (
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') res FROM example2
) q;
Expand Down Expand Up @@ -373,8 +381,7 @@ SELECT res['family'] family, res['coolness'] coolness FROM (
-- -- </JSON_TRANSFORM>
-- -- <JSON_TRANSFORM_STRICT>
SELECT public.json_transform_strict(j, '{"family": "TINYINT", "coolness": "DOUBLE"}') FROM example2;
WARNING: (PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: STRUCT("family" TINYINT, coolness DOUBLE) to Postgres type
ERROR: (PGDuckDB/CreatePlan) Cache lookup failed for type 0
ERROR: (PGDuckDB/Duckdb_ExecCustomScan_Cpp) Invalid Input Error: Failed to cast value to numerical: "canidae"
SELECT res['family'] family FROM (
SELECT public.json_transform_strict(j, '{"family": "VARCHAR"}') res FROM example2
) q;
Expand Down