Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions c/driver/postgresql/statement.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ struct BindStream {
type_id = PostgresTypeId::kBytea;
param_lengths[i] = 0;
break;
case ArrowType::NANOARROW_TYPE_TIMESTAMP:
type_id = PostgresTypeId::kTimestamp;
param_lengths[i] = 8;
break;
default:
SetError(error, "%s%" PRIu64 "%s%s%s%s", "[libpq] Field #",
static_cast<uint64_t>(i + 1), " ('", bind_schema->children[i]->name,
Expand Down Expand Up @@ -337,6 +341,31 @@ struct BindStream {
param_values[col] = const_cast<char*>(view.data.as_char);
break;
}
case ArrowType::NANOARROW_TYPE_TIMESTAMP: {
int64_t val = array_view->children[col]->buffer_views[1].data.as_int64[row];
auto unit = bind_schema_fields[col].time_unit;

// TODO: maybe upstream to nanoarrow as ArrowTimeUnitGetMultiplier
switch (unit) {
case NANOARROW_TIME_UNIT_SECOND:
val *= 1000000;
break;
case NANOARROW_TIME_UNIT_MILLI:
val *= 1000;
break;
case NANOARROW_TIME_UNIT_MICRO:
break;
case NANOARROW_TIME_UNIT_NANO:
val /= 1000;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should handle truncation/overflow here

break;
}

// 2000-01-01 00:00:00.000000 in microseconds
constexpr int64_t kPostgresTimestampEpoch = 946684800000000;
const uint64_t value = ToNetworkInt64(val - kPostgresTimestampEpoch);
std::memcpy(param_values[col], &value, sizeof(int64_t));
break;
}
default:
SetError(error, "%s%" PRId64 "%s%s%s%s", "[libpq] Field #", col + 1, " ('",
bind_schema->children[col]->name,
Expand Down Expand Up @@ -605,6 +634,9 @@ AdbcStatusCode PostgresStatement::CreateBulkTable(
case ArrowType::NANOARROW_TYPE_BINARY:
create += " BYTEA";
break;
case ArrowType::NANOARROW_TYPE_TIMESTAMP:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably differentiate between WITH/WITHOUT TIMEZONE?

note that in WITH TIMEZONE, Arrow always stores the underlying value in UTC so there's no need for us to do any time zone math (thankfully!)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I mistakenly assumed with TIMEZONE was a different type. For this PR planning to just raise if a timezone is detected, as I'm not yet sure how to transmit that information via the binary protocol

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need to check for timezone here, too?

create += " TIMESTAMP";
break;
default:
SetError(error, "%s%" PRIu64 "%s%s%s%s", "[libpq] Field #",
static_cast<uint64_t>(i + 1), " ('", source_schema.children[i]->name,
Expand Down
3 changes: 3 additions & 0 deletions c/driver/sqlite/sqlite_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ class SqliteStatementTest : public ::testing::Test,

void TestSqlIngestUInt64() { GTEST_SKIP() << "Cannot ingest UINT64 (out of range)"; }
void TestSqlIngestBinary() { GTEST_SKIP() << "Cannot ingest BINARY (not implemented)"; }
void TestSqlIngestTimestamp() {
GTEST_SKIP() << "Cannot ingest TIMESTAMP (not implemented)";
}

protected:
SqliteQuirks quirks_;
Expand Down
4 changes: 4 additions & 0 deletions c/validation/adbc_validation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,10 @@ void StatementTest::TestSqlIngestBinary() {
NANOARROW_TYPE_BINARY, {std::nullopt, "", "\x00\x01\x02\x04", "\xFE\xFF"}));
}

void StatementTest::TestSqlIngestTimestamp() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<int64_t>(NANOARROW_TYPE_TIMESTAMP, {0, 42}));
}

void StatementTest::TestSqlIngestAppend() {
if (!quirks()->supports_bulk_ingest()) {
GTEST_SKIP();
Expand Down
4 changes: 4 additions & 0 deletions c/validation/adbc_validation.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ class StatementTest {
void TestSqlIngestString();
void TestSqlIngestBinary();

// Temporal
void TestSqlIngestTimestamp();

// ---- End Type-specific tests ----------------

void TestSqlIngestAppend();
Expand Down Expand Up @@ -288,6 +291,7 @@ class StatementTest {
TEST_F(FIXTURE, SqlIngestFloat64) { TestSqlIngestFloat64(); } \
TEST_F(FIXTURE, SqlIngestString) { TestSqlIngestString(); } \
TEST_F(FIXTURE, SqlIngestBinary) { TestSqlIngestBinary(); } \
TEST_F(FIXTURE, SqlIngestTimestamp) { TestSqlIngestTimestamp(); } \
TEST_F(FIXTURE, SqlIngestAppend) { TestSqlIngestAppend(); } \
TEST_F(FIXTURE, SqlIngestErrors) { TestSqlIngestErrors(); } \
TEST_F(FIXTURE, SqlIngestMultipleConnections) { TestSqlIngestMultipleConnections(); } \
Expand Down
11 changes: 10 additions & 1 deletion c/validation/adbc_validation_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,16 @@ int MakeSchema(struct ArrowSchema* schema, const std::vector<SchemaField>& field
CHECK_ERRNO(ArrowSchemaSetTypeStruct(schema, fields.size()));
size_t i = 0;
for (const SchemaField& field : fields) {
CHECK_ERRNO(ArrowSchemaSetType(schema->children[i], field.type));
switch (field.type) {
case NANOARROW_TYPE_TIMESTAMP:
// TODO: don't hardcode unit here
CHECK_ERRNO(ArrowSchemaSetTypeDateTime(schema->children[i], field.type,
NANOARROW_TIME_UNIT_MICRO,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SchemaFields are an invention of the adbc_validation_util header - is there a reason we use this custom struct instead of an ArrowSchemaView? I'm not sure if its worth adding time_unit to the former or porting over to the latter in the test suite

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I originally just wanted something that could be easily stack allocated, versus a nested heap allocated structure. I was also struggling with how to deal with validation of more complex types. Especially in the validation code where you may not always want full exact matches (maybe you don't care about the field name, etc.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is OK for now but I think what I might end up doing is writing up a set of Googletest matchers so we can write out code like

ASSERT_THAT(view->children[0], IsTimestamp(/*any unit*/));
ASSERT_THAT(view->children[0], IsTimestamp(NANOARROW_TIME_UNIT_MICRO));

or something like that

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, for here...you could also just write out the code to build the schema by hand? This was meant as a convenience to cut down on boilerplate for simple schemas. But it was from a very early nanoarrow version, and nowadays I'm not sure how much boilerplate it actually saves compared to just writing it out.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense for matching, though I think we still have a gap with that when creating test data in non-default precisions that needs to be solved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK cool - will try that

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, sorry, I realized I had gotten the context completely wrong once I clicked through and started reviewing

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change still needed?

/*timezone=*/nullptr));
break;
default:
CHECK_ERRNO(ArrowSchemaSetType(schema->children[i], field.type));
}
CHECK_ERRNO(ArrowSchemaSetName(schema->children[i], field.name.c_str()));
if (!field.nullable) {
schema->children[i]->flags &= ~ARROW_FLAG_NULLABLE;
Expand Down