From 49c7f0922a0ffad624fc38bb6e891550b3930acc Mon Sep 17 00:00:00 2001 From: Ali Ince Date: Tue, 17 Oct 2023 15:20:11 +0100 Subject: [PATCH 1/4] feat: initial configuration for cdc --- .../kafka/configuration/Neo4jConfiguration.kt | 46 ++--- .../Neo4jConfigurationDeclarations.kt | 11 +- .../kafka/configuration/helpers/Validators.kt | 38 ++++ .../neo4j-source-configuration.properties | 4 +- .../main/distributions/text/doc/LICENSES.txt | 35 ++++ .../main/distributions/text/doc/NOTICE.txt | 6 + pom.xml | 10 ++ .../connectors/kafka/sink/Neo4jConnector.kt | 5 +- .../kafka/sink/SinkConfiguration.kt | 7 +- source/LICENSES.txt | 35 ++++ source/NOTICE.txt | 6 + source/pom.xml | 13 ++ .../DeprecatedNeo4jSourceConfiguration.kt | 12 +- .../connectors/kafka/source/Neo4jConnector.kt | 5 +- .../kafka/source/Neo4jSourceService.kt | 46 +++-- .../kafka/source/SourceConfiguration.kt | 137 +++++++++++--- .../DeprecatedNeo4jSourceConfigurationTest.kt | 42 +++++ .../kafka/source/Neo4jConnectorTest.kt | 168 ++++++++++++------ .../kafka/source/Neo4jSourceTaskTest.kt | 4 +- .../kafka/source/SourceConfigurationTest.kt | 82 +++++---- .../source/Neo4jSourceConnectorTest.kt | 42 +++++ 21 files changed, 571 insertions(+), 183 deletions(-) create mode 100644 source/src/test/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfigurationTest.kt create mode 100644 source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt diff --git a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt index 58f262776..525f19555 100644 --- a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt +++ b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt @@ -24,10 +24,9 @@ import kotlin.time.Duration.Companion.milliseconds import kotlin.time.Duration.Companion.seconds import org.apache.kafka.common.config.AbstractConfig import org.apache.kafka.common.config.ConfigDef -import org.apache.kafka.common.config.ConfigValue -import org.apache.kafka.common.config.types.Password import org.apache.kafka.connect.errors.ConnectException import org.neo4j.connectors.kafka.configuration.helpers.ConfigUtils +import org.neo4j.connectors.kafka.configuration.helpers.Validators.validateNonEmptyIfVisible import org.neo4j.connectors.kafka.configuration.helpers.parseSimpleString import org.neo4j.driver.AccessMode import org.neo4j.driver.AuthToken @@ -279,34 +278,21 @@ open class Neo4jConfiguration(configDef: ConfigDef, originals: Map<*, *>, val ty } /** Perform validation on dependent configuration items */ - fun validate(config: org.apache.kafka.common.config.Config) { - val values = config.configValues() - - validateNonEmptyIfVisible(values, AUTHENTICATION_BASIC_USERNAME) - validateNonEmptyIfVisible(values, AUTHENTICATION_BASIC_PASSWORD) - validateNonEmptyIfVisible(values, AUTHENTICATION_KERBEROS_TICKET) - validateNonEmptyIfVisible(values, AUTHENTICATION_BEARER_TOKEN) - validateNonEmptyIfVisible(values, AUTHENTICATION_CUSTOM_PRINCIPAL) - validateNonEmptyIfVisible(values, AUTHENTICATION_CUSTOM_CREDENTIALS) - validateNonEmptyIfVisible(values, AUTHENTICATION_CUSTOM_SCHEME) - } - - protected fun validateNonEmptyIfVisible(values: MutableList, name: String) { - values - .first { it.name() == name } - .run { - if (this.visible() && - (when (val value = this.value()) { - is String? -> value - is Password? -> value?.value() - else -> - throw IllegalArgumentException( - "unexpected value '$value' for configuration $name") - }) - .isNullOrBlank()) { - this.addErrorMessage("Must be non-empty.") - } - } + fun validate(config: org.apache.kafka.common.config.Config, originals: Map) { + // authentication configuration + config.validateNonEmptyIfVisible(AUTHENTICATION_BASIC_USERNAME) + config.validateNonEmptyIfVisible(AUTHENTICATION_BASIC_PASSWORD) + config.validateNonEmptyIfVisible(AUTHENTICATION_KERBEROS_TICKET) + config.validateNonEmptyIfVisible(AUTHENTICATION_BEARER_TOKEN) + config.validateNonEmptyIfVisible(AUTHENTICATION_CUSTOM_PRINCIPAL) + config.validateNonEmptyIfVisible(AUTHENTICATION_CUSTOM_CREDENTIALS) + config.validateNonEmptyIfVisible(AUTHENTICATION_CUSTOM_SCHEME) + + // security configuration + config.validateNonEmptyIfVisible(SECURITY_ENCRYPTED) + config.validateNonEmptyIfVisible(SECURITY_HOST_NAME_VERIFICATION_ENABLED) + config.validateNonEmptyIfVisible(SECURITY_TRUST_STRATEGY) + config.validateNonEmptyIfVisible(SECURITY_CERT_FILES) } fun config(): ConfigDef = diff --git a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfigurationDeclarations.kt b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfigurationDeclarations.kt index 9ec843091..7fb488dce 100644 --- a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfigurationDeclarations.kt +++ b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfigurationDeclarations.kt @@ -78,6 +78,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -92,6 +93,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -106,6 +108,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -120,6 +123,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -134,6 +138,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -148,6 +153,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -163,6 +169,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -177,6 +184,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -191,6 +199,7 @@ fun ConfigDef.defineConnectionSettings(): ConfigDef = group = CONNECTION.title importance = Importance.HIGH defaultValue = "" + dependents = listOf(Neo4jConfiguration.AUTHENTICATION_TYPE) recommender = Recommenders.visibleIf( Neo4jConfiguration.AUTHENTICATION_TYPE, @@ -226,8 +235,8 @@ fun ConfigDef.defineEncryptionSettings(): ConfigDef = documentation = PropertiesUtil.getProperty(Neo4jConfiguration.SECURITY_TRUST_STRATEGY) group = ADVANCED.title importance = Importance.LOW - dependents = listOf(Neo4jConfiguration.URI, Neo4jConfiguration.SECURITY_ENCRYPTED) validator = Validators.enum(Strategy::class.java) + dependents = listOf(Neo4jConfiguration.URI, Neo4jConfiguration.SECURITY_ENCRYPTED) recommender = Recommenders.and( Recommenders.enum(Strategy::class.java), diff --git a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt index 36286cb53..a170712da 100644 --- a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt +++ b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt @@ -20,8 +20,10 @@ import java.io.File import java.net.URI import java.net.URISyntaxException import java.util.regex.Pattern +import org.apache.kafka.common.config.Config import org.apache.kafka.common.config.ConfigDef import org.apache.kafka.common.config.ConfigException +import org.apache.kafka.common.config.types.Password object Validators { @@ -64,6 +66,22 @@ object Validators { } } + fun notBlank(): ConfigDef.Validator { + return ConfigDef.Validator { name, value -> + if (value is String) { + if (value.isEmpty()) { + throw ConfigException(name, value, "Must not be blank.") + } + } else if (value is List<*>) { + if (value.isEmpty()) { + throw ConfigException(name, value, "Must not be empty.") + } + } else { + throw ConfigException(name, value, "Must be a String or a List.") + } + } + } + fun string(vararg values: String): ConfigDef.Validator { return object : ConfigDef.Validator { override fun ensureValid(name: String?, value: Any?) { @@ -176,4 +194,24 @@ object Validators { } } } + + fun Config.validateNonEmptyIfVisible(name: String) { + this.configValues() + .first { it.name() == name } + .let { config -> + if (config.visible() && + (when (val value = config.value()) { + is Int? -> value != null + is Boolean? -> value != null + is String? -> value.isNullOrEmpty() + is Password? -> value?.value().isNullOrEmpty() + is List<*>? -> value.isEmpty() + else -> + throw IllegalArgumentException( + "unexpected value '$value' for configuration $name") + })) { + config.addErrorMessage("Must be non-empty.") + } + } + } } diff --git a/common/src/main/resources/neo4j-source-configuration.properties b/common/src/main/resources/neo4j-source-configuration.properties index 510ad74f8..817998cb3 100644 --- a/common/src/main/resources/neo4j-source-configuration.properties +++ b/common/src/main/resources/neo4j-source-configuration.properties @@ -13,7 +13,8 @@ # limitations under the License. ## ## Connection Properties -neo4j.stream-from=Type: Enum;\nDescription: A time anchor to start streaming from. +neo4j.start-from=Type: Enum;\nDescription: A time anchor to start streaming from. +neo4j.start-from.value=Type: STRING|LONG;\nDescription: Custom value to use as a starting offset. Used once during the initial run of the connector, and will be ignored if there is an offset stored in Kafka Connect. neo4j.source-strategy=Type: Enum;\nDescription: Source strategy for this connector. neo4j.query=Type: String;\nDescription: Cypher query to gather changes. Requires both `neo4j.query.streaming-property` to be in the result set, and `$lastCheck` query parameter for tracking changes. neo4j.query.streaming-property=Type: String;\nDescription: Property name that is both present in the result set of the specified query and used as a filter to query changes from a previous value. @@ -22,3 +23,4 @@ topic=Type: String;\nDescription: Kafka topic to push gathered change messages. neo4j.enforce-schema=Type: Boolean;\nDescription: Whether to attach schema to produced change messages. neo4j.query.batch-size=Type: Integer;\nDescription: Max number of messages pushed for each poll cycle. neo4j.query.timeout=Type: Duration;\nDescription: Maximum amount of time source query is allowed to run. +neo4j.cdc.poll-interval=Type: Duration;\nDescription: The interval at which the database will be queried for change data. \ No newline at end of file diff --git a/packaging/src/main/distributions/text/doc/LICENSES.txt b/packaging/src/main/distributions/text/doc/LICENSES.txt index dea585f73..ebef93530 100644 --- a/packaging/src/main/distributions/text/doc/LICENSES.txt +++ b/packaging/src/main/distributions/text/doc/LICENSES.txt @@ -4,7 +4,9 @@ libraries. For an overview of the licenses see the NOTICE.txt file. ------------------------------------------------------------------------------ Apache Software License, Version 2.0 + Apache Commons Collections Apache Commons Lang + cdc Jackson-annotations Jackson-core jackson-databind @@ -17,6 +19,7 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk8 kotlinx-coroutines-core Neo4j Java Driver + Non-Blocking Reactive Foundation for the JVM ------------------------------------------------------------------------------ Apache License @@ -223,6 +226,38 @@ Apache Software License, Version 2.0 +------------------------------------------------------------------------------ +BSD License + ANTLR 4 Runtime +------------------------------------------------------------------------------ + +Copyright (c) , +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + ------------------------------------------------------------------------------ MIT License SLF4J API Module diff --git a/packaging/src/main/distributions/text/doc/NOTICE.txt b/packaging/src/main/distributions/text/doc/NOTICE.txt index d6656abc6..9e5eba3de 100644 --- a/packaging/src/main/distributions/text/doc/NOTICE.txt +++ b/packaging/src/main/distributions/text/doc/NOTICE.txt @@ -19,7 +19,9 @@ Third-party licenses -------------------- Apache Software License, Version 2.0 + Apache Commons Collections Apache Commons Lang + cdc Jackson-annotations Jackson-core jackson-databind @@ -32,6 +34,10 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk8 kotlinx-coroutines-core Neo4j Java Driver + Non-Blocking Reactive Foundation for the JVM + +BSD License + ANTLR 4 Runtime MIT License SLF4J API Module diff --git a/pom.xml b/pom.xml index f0e3d1a13..6ffc5a869 100644 --- a/pom.xml +++ b/pom.xml @@ -49,6 +49,7 @@ 5.10.0 5.2.2 2.6.3 + 5.6.2 1.7.3 1.9.0 4.2 @@ -124,6 +125,11 @@ kafka-avro-serializer ${kafka-avro-serializer.version} + + io.kotest + kotest-assertions-core-jvm + ${kotest-assertions-core-jvm.version} + org.apache.avro avro @@ -192,6 +198,10 @@ + + github + https://maven.pkg.github.com/neo4j/connectors-build-resources + confluent https://packages.confluent.io/maven/ diff --git a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt index 671d59585..b079764e6 100644 --- a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt +++ b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt @@ -40,9 +40,10 @@ class Neo4jConnector : SinkConnector() { override fun config(): ConfigDef = SinkConfiguration.config() override fun validate(connectorConfigs: MutableMap?): Config { - val result = super.validate(connectorConfigs) + val originals = connectorConfigs ?: emptyMap() + val result = super.validate(originals) - SinkConfiguration.validate(result) + SinkConfiguration.validate(result, originals) return result } diff --git a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt index b0b10df5c..51e12e95e 100644 --- a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt +++ b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt @@ -124,8 +124,11 @@ class SinkConfiguration(originals: Map<*, *>) : return migrated } - fun validate(config: org.apache.kafka.common.config.Config) { - Neo4jConfiguration.validate(config) + internal fun validate( + config: org.apache.kafka.common.config.Config, + originals: Map + ) { + Neo4jConfiguration.validate(config, originals) } fun config(): ConfigDef = diff --git a/source/LICENSES.txt b/source/LICENSES.txt index dea585f73..ebef93530 100644 --- a/source/LICENSES.txt +++ b/source/LICENSES.txt @@ -4,7 +4,9 @@ libraries. For an overview of the licenses see the NOTICE.txt file. ------------------------------------------------------------------------------ Apache Software License, Version 2.0 + Apache Commons Collections Apache Commons Lang + cdc Jackson-annotations Jackson-core jackson-databind @@ -17,6 +19,7 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk8 kotlinx-coroutines-core Neo4j Java Driver + Non-Blocking Reactive Foundation for the JVM ------------------------------------------------------------------------------ Apache License @@ -223,6 +226,38 @@ Apache Software License, Version 2.0 +------------------------------------------------------------------------------ +BSD License + ANTLR 4 Runtime +------------------------------------------------------------------------------ + +Copyright (c) , +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + ------------------------------------------------------------------------------ MIT License SLF4J API Module diff --git a/source/NOTICE.txt b/source/NOTICE.txt index d6656abc6..9e5eba3de 100644 --- a/source/NOTICE.txt +++ b/source/NOTICE.txt @@ -19,7 +19,9 @@ Third-party licenses -------------------- Apache Software License, Version 2.0 + Apache Commons Collections Apache Commons Lang + cdc Jackson-annotations Jackson-core jackson-databind @@ -32,6 +34,10 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk8 kotlinx-coroutines-core Neo4j Java Driver + Non-Blocking Reactive Foundation for the JVM + +BSD License + ANTLR 4 Runtime MIT License SLF4J API Module diff --git a/source/pom.xml b/source/pom.xml index 91482fc5c..11d38933c 100644 --- a/source/pom.xml +++ b/source/pom.xml @@ -11,7 +11,15 @@ jar source Neo4j Connector for Kafka - Source + + 1.0.3 + + + org.neo4j.connectors + cdc + ${cdc.version} + org.neo4j.connectors.kafka common @@ -27,6 +35,11 @@ ${kafka.version} provided + + io.kotest + kotest-assertions-core-jvm + test + org.assertj assertj-core diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfiguration.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfiguration.kt index aa7d20f31..c7b5ba485 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfiguration.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfiguration.kt @@ -28,9 +28,15 @@ import org.neo4j.connectors.kafka.utils.PropertiesUtil class DeprecatedNeo4jSourceConfiguration(originals: Map<*, *>) : DeprecatedNeo4jConfiguration(config(), originals, ConnectorType.SOURCE) { + enum class StreamingFrom { + ALL, + NOW, + LAST_COMMITTED + } + companion object { const val TOPIC = "topic" - @Deprecated("deprecated in favour of ${SourceConfiguration.STREAM_FROM}") + @Deprecated("deprecated in favour of ${SourceConfiguration.START_FROM}") const val STREAMING_FROM = "neo4j.streaming.from" @Deprecated("deprecated in favour of ${SourceConfiguration.ENFORCE_SCHEMA}") const val ENFORCE_SCHEMA = "neo4j.enforce.schema" @@ -84,8 +90,8 @@ class DeprecatedNeo4jSourceConfiguration(originals: Map<*, *>) : documentation = PropertiesUtil.getProperty(SOURCE_TYPE) importance = ConfigDef.Importance.HIGH defaultValue = SourceType.QUERY.toString() - validator = Validators.enum(SourceType::class.java) - recommender = Recommenders.enum(SourceType::class.java) + validator = Validators.enum(SourceType::class.java, SourceType.CDC) + recommender = Recommenders.enum(SourceType::class.java, SourceType.CDC) }) .define( ConfigKeyBuilder.of(SOURCE_TYPE_QUERY, ConfigDef.Type.STRING) { diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt index 44b550a07..544dc1f77 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt @@ -40,9 +40,10 @@ class Neo4jConnector : SourceConnector() { override fun config(): ConfigDef = SourceConfiguration.config() override fun validate(connectorConfigs: MutableMap?): Config { - val result = super.validate(connectorConfigs) + val originals = connectorConfigs ?: emptyMap() + val result = super.validate(originals) - SourceConfiguration.validate(result) + SourceConfiguration.validate(result, originals) return result } diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceService.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceService.kt index 1c9da8ff6..a416cc13d 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceService.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceService.kt @@ -54,25 +54,33 @@ class Neo4jSourceService( private val isClose = AtomicBoolean() - private val lastCheck: AtomicLong by lazy { + private val currentOffset: AtomicLong by lazy { val offset = offsetStorageReader.offset(sourcePartition) ?: emptyMap() - // if the user wants to recover from LAST_COMMITTED + val startValue = - if (config.streamFrom == StreamingFrom.LAST_COMMITTED && - offset["value"] != null && - offset["property"] == config.queryStreamingProperty) { + if (offset["value"] != null && offset["property"] == config.queryStreamingProperty) { log.info( - "Resuming offset $offset, the ${SourceConfiguration.STREAM_FROM} value is ignored") + "Resuming from offset $offset, '${config.startFrom}' specified for configuration '${SourceConfiguration.START_FROM}' is ignored.") offset["value"] as Long } else { - if (config.streamFrom == StreamingFrom.LAST_COMMITTED) { - log.info( - "You provided ${SourceConfiguration.STREAM_FROM}: ${config.streamFrom} but no offset has been found, we'll start to consume from NOW") - } else { - log.info( - "No offset to resume, we'll use the provided value of ${SourceConfiguration.STREAM_FROM}: ${config.streamFrom}") + when (config.startFrom) { + StartFrom.EARLIEST -> { + log.info( + "No offset has been found and '${config.startFrom}' for configuration '${SourceConfiguration.START_FROM}' will be used.") + (-1) + } + StartFrom.NOW -> { + log.info( + "No offset has been found and '${config.startFrom}' for configuration '${SourceConfiguration.START_FROM}' will be used.") + System.currentTimeMillis() + } + StartFrom.USER_PROVIDED -> { + val provided = config.startFromCustom.toLong() + log.info( + "No offset has been found and '${config.startFrom}' for configuration '${SourceConfiguration.START_FROM}' will be used with a starting offset value '${provided}'.") + provided + } } - config.streamFrom.value() } AtomicLong(startValue) } @@ -92,15 +100,16 @@ class Neo4jSourceService( if (!isStreamingPropertyDefined) { // we update the lastCheck property only if the last loop round // returned results otherwise we stick to the old value + // TODO: Not sure what this does exactly if (lastCheckHadResult) { - lastCheck.set(System.currentTimeMillis() - pollInterval) + currentOffset.set(System.currentTimeMillis() - pollInterval) } } config .session() .readTransaction( { tx -> - val result = tx.run(config.query, mapOf("lastCheck" to lastCheck.get())) + val result = tx.run(config.query, mapOf("lastCheck" to currentOffset.get())) lastCheckHadResult = result.hasNext() result.forEach { record -> try { @@ -135,7 +144,7 @@ class Neo4jSourceService( try { if (isStreamingPropertyDefined) { val value = record.get(config.queryStreamingProperty, Values.value(-1L)).asLong() - lastCheck.getAndUpdate { oldValue -> + currentOffset.getAndUpdate { oldValue -> if (oldValue >= value) { oldValue } else { @@ -144,10 +153,11 @@ class Neo4jSourceService( } value } else { - lastCheck.get() + currentOffset.get() } } catch (e: Throwable) { - lastCheck.get() + // TODO: should we not log an error here? + currentOffset.get() } private fun checkError() { diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt index 89e93d059..381e857cd 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt @@ -22,6 +22,9 @@ import kotlin.time.Duration.Companion.seconds import kotlin.time.toJavaDuration import org.apache.kafka.common.config.ConfigDef import org.apache.kafka.common.config.ConfigDef.Range +import org.apache.kafka.common.config.ConfigException +import org.neo4j.cdc.client.pattern.Pattern +import org.neo4j.cdc.client.pattern.PatternException import org.neo4j.connectors.kafka.configuration.ConnectorType import org.neo4j.connectors.kafka.configuration.DeprecatedNeo4jConfiguration import org.neo4j.connectors.kafka.configuration.Neo4jConfiguration @@ -29,6 +32,7 @@ import org.neo4j.connectors.kafka.configuration.helpers.ConfigKeyBuilder import org.neo4j.connectors.kafka.configuration.helpers.Recommenders import org.neo4j.connectors.kafka.configuration.helpers.SIMPLE_DURATION_PATTERN import org.neo4j.connectors.kafka.configuration.helpers.Validators +import org.neo4j.connectors.kafka.configuration.helpers.Validators.validateNonEmptyIfVisible import org.neo4j.connectors.kafka.configuration.helpers.parseSimpleString import org.neo4j.connectors.kafka.configuration.helpers.toSimpleString import org.neo4j.connectors.kafka.source.DeprecatedNeo4jSourceConfiguration.Companion.ENFORCE_SCHEMA @@ -38,25 +42,23 @@ import org.neo4j.driver.TransactionConfig enum class SourceType { QUERY, + CDC } -enum class StreamingFrom { - ALL, +enum class StartFrom { + EARLIEST, NOW, - LAST_COMMITTED; - - fun value() = - when (this) { - ALL -> -1 - else -> System.currentTimeMillis() - } + USER_PROVIDED } class SourceConfiguration(originals: Map<*, *>) : Neo4jConfiguration(config(), originals, ConnectorType.SOURCE) { - val streamFrom - get(): StreamingFrom = StreamingFrom.valueOf(getString(STREAM_FROM)) + val startFrom + get(): StartFrom = StartFrom.valueOf(getString(START_FROM)) + + val startFromCustom + get(): String = getString(START_FROM_VALUE) val enforceSchema get(): Boolean = getBoolean(ENFORCE_SCHEMA) @@ -88,6 +90,7 @@ class SourceConfiguration(originals: Map<*, *>) : SourceType.QUERY -> mapOf( "database" to this.database, "type" to "query", "query" to query, "partition" to 1) + SourceType.CDC -> mapOf("database" to this.database, "type" to "cdc", "partition" to 1) } } @@ -105,7 +108,8 @@ class SourceConfiguration(originals: Map<*, *>) : } companion object { - const val STREAM_FROM = "neo4j.stream-from" + const val START_FROM = "neo4j.start-from" + const val START_FROM_VALUE = "neo4j.start-from.value" const val STRATEGY = "neo4j.source-strategy" const val QUERY = "neo4j.query" const val QUERY_STREAMING_PROPERTY = "neo4j.query.streaming-property" @@ -114,10 +118,14 @@ class SourceConfiguration(originals: Map<*, *>) : const val QUERY_TIMEOUT = "neo4j.query.timeout" const val TOPIC = "topic" const val ENFORCE_SCHEMA = "neo4j.enforce-schema" + const val CDC_POLL_INTERVAL = "neo4j.cdc.poll-interval" + private val CDC_PATTERNS_REGEX = + Regex("^neo4j\\.cdc\\.topic\\.([a-zA-Z0-9._-]+)(\\.patterns)?$") private val DEFAULT_POLL_INTERVAL = 10.seconds private const val DEFAULT_QUERY_BATCH_SIZE = 1000 private val DEFAULT_QUERY_TIMEOUT = 0.seconds + private val DEFAULT_CDC_POLL_INTERVAL = 10.seconds fun migrateSettings(oldSettings: Map): Map { val migrated = Neo4jConfiguration.migrateSettings(oldSettings, true).toMutableMap() @@ -125,7 +133,14 @@ class SourceConfiguration(originals: Map<*, *>) : oldSettings.forEach { when (it.key) { DeprecatedNeo4jSourceConfiguration.STREAMING_FROM -> - migrated[STREAM_FROM] = it.value.toString() + migrated[START_FROM] = + when (DeprecatedNeo4jSourceConfiguration.StreamingFrom.valueOf( + it.value.toString())) { + DeprecatedNeo4jSourceConfiguration.StreamingFrom.ALL -> StartFrom.EARLIEST.name + DeprecatedNeo4jSourceConfiguration.StreamingFrom.NOW -> StartFrom.NOW.name + DeprecatedNeo4jSourceConfiguration.StreamingFrom.LAST_COMMITTED -> + StartFrom.NOW.name + } DeprecatedNeo4jSourceConfiguration.SOURCE_TYPE -> migrated[STRATEGY] = it.value.toString() DeprecatedNeo4jSourceConfiguration.SOURCE_TYPE_QUERY -> migrated[QUERY] = it.value.toString() @@ -150,20 +165,53 @@ class SourceConfiguration(originals: Map<*, *>) : return migrated } - fun validate(config: org.apache.kafka.common.config.Config) { - Neo4jConfiguration.validate(config) + internal fun validate( + config: org.apache.kafka.common.config.Config, + originals: Map + ) { + Neo4jConfiguration.validate(config, originals) + + // START_FROM user defined validation + config.validateNonEmptyIfVisible(START_FROM_VALUE) + + // QUERY strategy validation + config.validateNonEmptyIfVisible(TOPIC) + config.validateNonEmptyIfVisible(QUERY) + config.validateNonEmptyIfVisible(QUERY_TIMEOUT) + config.validateNonEmptyIfVisible(QUERY_POLL_INTERVAL) + config.validateNonEmptyIfVisible(QUERY_BATCH_SIZE) + + // CDC validation + config.validateNonEmptyIfVisible(CDC_POLL_INTERVAL) + + val configList = config.configValues().toList() + val strategy = configList.find { it.name() == STRATEGY } + if (strategy?.value() == SourceType.CDC.name) { + val cdcTopics = originals.entries.filter { CDC_PATTERNS_REGEX.matches(it.key) } + if (cdcTopics.isEmpty() || cdcTopics.size > 1) { + strategy.addErrorMessage( + "Exactly one topic needs to be configured with pattern(s) describing the entities to query changes for. Please refer to documentation for more information.") + } else { + cdcTopics.forEach { + // parse & validate CDC patterns + try { + Validators.notBlank().ensureValid(it.key, it.value) + + try { + Pattern.parse(it.value as String?) + } catch (e: PatternException) { + throw ConfigException(it.key, it.value, e.message) + } + } catch (e: ConfigException) { + strategy.addErrorMessage(e.message) + } + } + } + } } fun config(): ConfigDef = Neo4jConfiguration.config() - .define( - ConfigKeyBuilder.of(STREAM_FROM, ConfigDef.Type.STRING) { - documentation = PropertiesUtil.getProperty(STREAM_FROM) - importance = ConfigDef.Importance.HIGH - defaultValue = StreamingFrom.NOW.toString() - validator = Validators.enum(StreamingFrom::class.java) - recommender = Recommenders.enum(StreamingFrom::class.java) - }) .define( ConfigKeyBuilder.of(STRATEGY, ConfigDef.Type.STRING) { documentation = PropertiesUtil.getProperty(STRATEGY) @@ -172,6 +220,33 @@ class SourceConfiguration(originals: Map<*, *>) : validator = Validators.enum(SourceType::class.java) recommender = Recommenders.enum(SourceType::class.java) }) + .define( + ConfigKeyBuilder.of(START_FROM, ConfigDef.Type.STRING) { + documentation = PropertiesUtil.getProperty(START_FROM) + importance = ConfigDef.Importance.HIGH + defaultValue = StartFrom.NOW.toString() + validator = Validators.enum(StartFrom::class.java) + recommender = Recommenders.enum(StartFrom::class.java) + }) + .define( + ConfigKeyBuilder.of(START_FROM_VALUE, ConfigDef.Type.STRING) { + documentation = PropertiesUtil.getProperty(START_FROM_VALUE) + importance = ConfigDef.Importance.HIGH + defaultValue = "" + dependents = listOf(START_FROM) + recommender = + Recommenders.visibleIf( + START_FROM, Predicate.isEqual(StartFrom.USER_PROVIDED.name)) + }) + .define( + ConfigKeyBuilder.of(TOPIC, ConfigDef.Type.STRING) { + documentation = PropertiesUtil.getProperty(TOPIC) + importance = ConfigDef.Importance.HIGH + validator = ConfigDef.NonEmptyString() + dependents = listOf(STRATEGY) + recommender = + Recommenders.visibleIf(STRATEGY, Predicate.isEqual(SourceType.QUERY.name)) + }) .define( ConfigKeyBuilder.of(QUERY, ConfigDef.Type.STRING) { documentation = PropertiesUtil.getProperty(QUERY) @@ -219,6 +294,16 @@ class SourceConfiguration(originals: Map<*, *>) : validator = Validators.pattern(SIMPLE_DURATION_PATTERN) defaultValue = DEFAULT_QUERY_TIMEOUT.toSimpleString() }) + .define( + ConfigKeyBuilder.of(CDC_POLL_INTERVAL, ConfigDef.Type.STRING) { + documentation = PropertiesUtil.getProperty(CDC_POLL_INTERVAL) + importance = ConfigDef.Importance.HIGH + dependents = listOf(STRATEGY) + recommender = + Recommenders.visibleIf(STRATEGY, Predicate.isEqual(SourceType.CDC.name)) + validator = Validators.pattern(SIMPLE_DURATION_PATTERN) + defaultValue = DEFAULT_CDC_POLL_INTERVAL.toSimpleString() + }) .define( ConfigKeyBuilder.of(ENFORCE_SCHEMA, ConfigDef.Type.BOOLEAN) { documentation = PropertiesUtil.getProperty(ENFORCE_SCHEMA) @@ -226,11 +311,5 @@ class SourceConfiguration(originals: Map<*, *>) : defaultValue = false validator = ConfigDef.NonNullValidator() }) - .define( - ConfigKeyBuilder.of(TOPIC, ConfigDef.Type.STRING) { - documentation = PropertiesUtil.getProperty(TOPIC) - importance = ConfigDef.Importance.HIGH - validator = ConfigDef.NonEmptyString() - }) } } diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfigurationTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfigurationTest.kt new file mode 100644 index 000000000..84fba5d90 --- /dev/null +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/DeprecatedNeo4jSourceConfigurationTest.kt @@ -0,0 +1,42 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.neo4j.connectors.kafka.source + +import kotlin.test.assertEquals +import kotlin.test.assertFailsWith +import org.apache.kafka.common.config.ConfigException +import org.junit.jupiter.api.Test +import org.neo4j.connectors.kafka.configuration.DeprecatedNeo4jConfiguration + +class DeprecatedNeo4jSourceConfigurationTest { + @Test + fun `should not allow cdc as source type`() { + assertFailsWith(ConfigException::class) { + DeprecatedNeo4jSourceConfiguration( + mapOf( + DeprecatedNeo4jConfiguration.SERVER_URI to "bolt://localhost", + DeprecatedNeo4jConfiguration.AUTHENTICATION_TYPE to "NONE", + DeprecatedNeo4jSourceConfiguration.TOPIC to "topic", + DeprecatedNeo4jSourceConfiguration.SOURCE_TYPE to "CDC")) + } + .also { + assertEquals( + "Invalid value CDC for configuration neo4j.source.type: Must be one of: 'QUERY'.", + it.message) + } + } +} diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt index 7c417ef42..ca8cb1463 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt @@ -16,8 +16,10 @@ */ package org.neo4j.connectors.kafka.source -import kotlin.test.assertContains -import kotlin.test.assertTrue +import io.kotest.matchers.collections.shouldContain +import io.kotest.matchers.collections.shouldExist +import io.kotest.matchers.collections.shouldMatchEach +import io.kotest.matchers.shouldBe import org.junit.jupiter.api.Test import org.neo4j.connectors.kafka.configuration.Neo4jConfiguration @@ -28,25 +30,18 @@ class Neo4jConnectorTest { val connector = Neo4jConnector() val config = connector.validate(mutableMapOf(Neo4jConfiguration.AUTHENTICATION_TYPE to "BASIC")) - assertContains( - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_USERNAME } - .errorMessages(), - "Must be non-empty.") - assertContains( - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_PASSWORD } - .errorMessages(), - "Must be non-empty.") - assertTrue { - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_REALM } - .errorMessages() - .isEmpty() - } + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_USERNAME } + .errorMessages() shouldContain "Must be non-empty." + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_PASSWORD } + .errorMessages() shouldContain "Must be non-empty." + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_REALM } + .errorMessages() shouldBe emptyList() } @Test @@ -55,12 +50,10 @@ class Neo4jConnectorTest { val config = connector.validate(mutableMapOf(Neo4jConfiguration.AUTHENTICATION_TYPE to "KERBEROS")) - assertContains( - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_KERBEROS_TICKET } - .errorMessages(), - "Must be non-empty.") + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_KERBEROS_TICKET } + .errorMessages() shouldContain "Must be non-empty." } @Test @@ -69,12 +62,10 @@ class Neo4jConnectorTest { val config = connector.validate(mutableMapOf(Neo4jConfiguration.AUTHENTICATION_TYPE to "BEARER")) - assertContains( - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BEARER_TOKEN } - .errorMessages(), - "Must be non-empty.") + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BEARER_TOKEN } + .errorMessages() shouldContain "Must be non-empty." } @Test @@ -83,30 +74,91 @@ class Neo4jConnectorTest { val config = connector.validate(mutableMapOf(Neo4jConfiguration.AUTHENTICATION_TYPE to "CUSTOM")) - assertContains( - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_SCHEME } - .errorMessages(), - "Must be non-empty.") - assertContains( - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_PRINCIPAL } - .errorMessages(), - "Must be non-empty.") - assertContains( - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_CREDENTIALS } - .errorMessages(), - "Must be non-empty.") - assertTrue { - config - .configValues() - .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_REALM } - .errorMessages() - .isEmpty() - } + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_SCHEME } + .errorMessages() shouldContain "Must be non-empty." + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_PRINCIPAL } + .errorMessages() shouldContain "Must be non-empty." + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_CREDENTIALS } + .errorMessages() shouldContain "Must be non-empty." + config + .configValues() + .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_REALM } + .errorMessages() shouldBe emptyList() + } + + @Test + fun `should validate empty topic configuration with cdc strategy`() { + val connector = Neo4jConnector() + val config = + connector.validate( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + Neo4jConfiguration.AUTHENTICATION_TYPE to "NONE", + SourceConfiguration.STRATEGY to "CDC")) + + config + .configValues() + .first { it.name() == SourceConfiguration.STRATEGY } + .errorMessages() shouldContain + "Exactly one topic needs to be configured with pattern(s) describing the entities to query changes for. Please refer to documentation for more information." + } + + @Test + fun `should validate topic patterns with cdc strategy`() { + val connector = Neo4jConnector() + + connector + .validate( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + Neo4jConfiguration.AUTHENTICATION_TYPE to "NONE", + SourceConfiguration.STRATEGY to "CDC", + "neo4j.cdc.topic.topic-1" to "")) + .apply { + this.configValues() + .first { it.name() == SourceConfiguration.STRATEGY } + .errorMessages() shouldContain + "Invalid value for configuration neo4j.cdc.topic.topic-1: Must not be blank." + } + + connector + .validate( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + Neo4jConfiguration.AUTHENTICATION_TYPE to "NONE", + SourceConfiguration.STRATEGY to "CDC", + "neo4j.cdc.topic.topic-1" to "(;ABC]")) + .apply { + this.configValues() + .first { it.name() == SourceConfiguration.STRATEGY } + .errorMessages() shouldExist + { + it.startsWith("Invalid value (;ABC] for configuration neo4j.cdc.topic.topic-1:") + } + } + + connector + .validate( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + Neo4jConfiguration.AUTHENTICATION_TYPE to "NONE", + SourceConfiguration.STRATEGY to "CDC", + "neo4j.cdc.topic.topic-1" to "(:Person),()-[:KNOWS]-()", + "neo4j.cdc.topic.topic-2.patterns" to "(:Person),()-[:KNOWS]-(:Company)")) + .apply { + this.configValues() + .first { it.name() == SourceConfiguration.STRATEGY } + .errorMessages() shouldMatchEach + listOf { + !it.startsWith( + "Exactly one topic needs to be configured with pattern(s) describing the entities to query changes for.") + } + } } } diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTaskTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTaskTest.kt index c3ef7d718..7c80d7962 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTaskTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTaskTest.kt @@ -150,7 +150,7 @@ class Neo4jSourceTaskTest { val props = mutableMapOf() props[Neo4jConfiguration.URI] = neo4j.boltUrl props[SourceConfiguration.TOPIC] = UUID.randomUUID().toString() - props[SourceConfiguration.STREAM_FROM] = "ALL" + props[SourceConfiguration.START_FROM] = "EARLIEST" props[SourceConfiguration.QUERY_POLL_INTERVAL] = "10ms" props[SourceConfiguration.QUERY_STREAMING_PROPERTY] = "timestamp" props[SourceConfiguration.QUERY] = getSourceQuery() @@ -174,7 +174,7 @@ class Neo4jSourceTaskTest { val props = mutableMapOf() props[Neo4jConfiguration.URI] = neo4j.boltUrl props[SourceConfiguration.TOPIC] = UUID.randomUUID().toString() - props[SourceConfiguration.STREAM_FROM] = "ALL" + props[SourceConfiguration.START_FROM] = "EARLIEST" props[SourceConfiguration.QUERY_POLL_INTERVAL] = "10ms" props[SourceConfiguration.ENFORCE_SCHEMA] = "true" props[SourceConfiguration.QUERY_STREAMING_PROPERTY] = "timestamp" diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt index 0bd8eaa4c..cdd7797cf 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt @@ -16,6 +16,7 @@ */ package org.neo4j.connectors.kafka.source +import io.kotest.matchers.throwable.shouldHaveMessage import kotlin.test.assertEquals import kotlin.test.assertFailsWith import kotlin.test.assertTrue @@ -41,21 +42,20 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "none")) } .also { - assertEquals( - "Invalid value none for configuration neo4j.source-strategy: Must be one of: 'QUERY'.", - it.message) + it shouldHaveMessage + "Invalid value none for configuration neo4j.source-strategy: Must be one of: 'QUERY', 'CDC'." } assertFailsWith(ConfigException::class) { SourceConfiguration( mapOf( Neo4jConfiguration.URI to "neo4j://localhost", + SourceConfiguration.TOPIC to "topic", SourceConfiguration.STRATEGY to "QUERY")) } .also { - assertEquals( - "Missing required configuration \"neo4j.query\" which has no default value.", - it.message) + it shouldHaveMessage + "Missing required configuration \"neo4j.query\" which has no default value." } assertFailsWith(ConfigException::class) { @@ -65,9 +65,8 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "none")) } .also { - assertEquals( - "Invalid value none for configuration neo4j.source-strategy: Must be one of: 'QUERY'.", - it.message) + it shouldHaveMessage + "Invalid value none for configuration neo4j.source-strategy: Must be one of: 'QUERY', 'CDC'." } assertFailsWith(ConfigException::class) { @@ -78,8 +77,8 @@ class SourceConfigurationTest { SourceConfiguration.QUERY to "MATCH (n) RETURN n")) } .also { - assertEquals( - "Missing required configuration \"topic\" which has no default value.", it.message) + it shouldHaveMessage + "Missing required configuration \"topic\" which has no default value." } assertFailsWith(ConfigException::class) { @@ -89,12 +88,11 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "QUERY", SourceConfiguration.QUERY to "MATCH (n) RETURN n", SourceConfiguration.TOPIC to "my-topic", - SourceConfiguration.STREAM_FROM to "none")) + SourceConfiguration.START_FROM to "none")) } .also { - assertEquals( - "Invalid value none for configuration neo4j.stream-from: Must be one of: 'ALL', 'NOW', 'LAST_COMMITTED'.", - it.message) + it shouldHaveMessage + "Invalid value none for configuration neo4j.start-from: Must be one of: 'EARLIEST', 'NOW', 'USER_PROVIDED'." } assertFailsWith(ConfigException::class) { @@ -104,13 +102,12 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "QUERY", SourceConfiguration.QUERY to "MATCH (n) RETURN n", SourceConfiguration.TOPIC to "my-topic", - SourceConfiguration.STREAM_FROM to "ALL", + SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1k")) } .also { - assertEquals( - "Invalid value 1k for configuration neo4j.query.poll-interval: Must match pattern '(\\d+(ms|s|m|h|d))+'.", - it.message) + it shouldHaveMessage + "Invalid value 1k for configuration neo4j.query.poll-interval: Must match pattern '(\\d+(ms|s|m|h|d))+'." } assertFailsWith(ConfigException::class) { @@ -120,14 +117,13 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "QUERY", SourceConfiguration.QUERY to "MATCH (n) RETURN n", SourceConfiguration.TOPIC to "my-topic", - SourceConfiguration.STREAM_FROM to "ALL", + SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "1k")) } .also { - assertEquals( - "Invalid value 1k for configuration neo4j.query.timeout: Must match pattern '(\\d+(ms|s|m|h|d))+'.", - it.message) + it shouldHaveMessage + "Invalid value 1k for configuration neo4j.query.timeout: Must match pattern '(\\d+(ms|s|m|h|d))+'." } assertFailsWith(ConfigException::class) { @@ -137,15 +133,14 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "QUERY", SourceConfiguration.QUERY to "MATCH (n) RETURN n", SourceConfiguration.TOPIC to "my-topic", - SourceConfiguration.STREAM_FROM to "ALL", + SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", SourceConfiguration.QUERY_BATCH_SIZE to "-1")) } .also { - assertEquals( - "Invalid value -1 for configuration neo4j.query.batch-size: Value must be at least 1", - it.message) + it shouldHaveMessage + "Invalid value -1 for configuration neo4j.query.batch-size: Value must be at least 1" } assertFailsWith(ConfigException::class) { @@ -155,16 +150,33 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "QUERY", SourceConfiguration.QUERY to "MATCH (n) RETURN n", SourceConfiguration.TOPIC to "my-topic", - SourceConfiguration.STREAM_FROM to "ALL", + SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", SourceConfiguration.QUERY_BATCH_SIZE to "50", SourceConfiguration.ENFORCE_SCHEMA to "disabled")) } .also { - assertEquals( - "Invalid value disabled for configuration neo4j.enforce-schema: Expected value to be either true or false", - it.message) + it shouldHaveMessage + "Invalid value disabled for configuration neo4j.enforce-schema: Expected value to be either true or false" + } + + assertFailsWith(ConfigException::class) { + SourceConfiguration( + mapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + SourceConfiguration.STRATEGY to "QUERY", + SourceConfiguration.QUERY to "MATCH (n) RETURN n", + SourceConfiguration.TOPIC to "my-topic", + SourceConfiguration.START_FROM to "USER_PROVIDED", + SourceConfiguration.QUERY_POLL_INTERVAL to "1m", + SourceConfiguration.QUERY_TIMEOUT to "5m", + SourceConfiguration.QUERY_BATCH_SIZE to "50", + SourceConfiguration.ENFORCE_SCHEMA to "disabled")) + } + .also { + it shouldHaveMessage + "Invalid value disabled for configuration neo4j.enforce-schema: Expected value to be either true or false" } } @@ -177,7 +189,7 @@ class SourceConfigurationTest { SourceConfiguration.STRATEGY to "QUERY", SourceConfiguration.QUERY to "MATCH (n) RETURN n", SourceConfiguration.TOPIC to "my-topic", - SourceConfiguration.STREAM_FROM to "ALL", + SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", SourceConfiguration.QUERY_BATCH_SIZE to "50", @@ -187,7 +199,7 @@ class SourceConfigurationTest { assertEquals("MATCH (n) RETURN n", config.query) assertEquals("", config.queryStreamingProperty) assertEquals("my-topic", config.topic) - assertEquals(StreamingFrom.ALL, config.streamFrom) + assertEquals(StartFrom.EARLIEST, config.startFrom) assertEquals(1.minutes, config.queryPollingInterval) assertEquals(5.minutes, config.queryTimeout) assertEquals(50, config.queryBatchSize) @@ -204,7 +216,7 @@ class SourceConfigurationTest { SourceConfiguration.QUERY to "MATCH (n) RETURN n", SourceConfiguration.QUERY_STREAMING_PROPERTY to "timestamp", SourceConfiguration.TOPIC to "my-topic", - SourceConfiguration.STREAM_FROM to "ALL", + SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", SourceConfiguration.QUERY_BATCH_SIZE to "50", @@ -214,7 +226,7 @@ class SourceConfigurationTest { assertEquals("MATCH (n) RETURN n", config.query) assertEquals("timestamp", config.queryStreamingProperty) assertEquals("my-topic", config.topic) - assertEquals(StreamingFrom.ALL, config.streamFrom) + assertEquals(StartFrom.EARLIEST, config.startFrom) assertEquals(1.minutes, config.queryPollingInterval) assertEquals(5.minutes, config.queryTimeout) assertEquals(50, config.queryBatchSize) diff --git a/source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt b/source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt new file mode 100644 index 000000000..68aab8be9 --- /dev/null +++ b/source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt @@ -0,0 +1,42 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package streams.kafka.connect.source + +import kotlin.test.assertContains +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertTrue +import org.junit.jupiter.api.Test +import org.neo4j.connectors.kafka.source.DeprecatedNeo4jSourceConfiguration + +class Neo4jSourceConnectorTest { + + @Test + fun `should not allow cdc as source type during validation`() { + val connector = Neo4jSourceConnector() + val config = connector.validate(mapOf(DeprecatedNeo4jSourceConfiguration.SOURCE_TYPE to "CDC")) + + val entry = + config.configValues().first { it.name() == DeprecatedNeo4jSourceConfiguration.SOURCE_TYPE } + assertNotNull(entry) + assertEquals(listOf("QUERY"), entry.recommendedValues()) + assertTrue(entry.errorMessages().isNotEmpty()) + assertContains( + entry.errorMessages(), + "Invalid value CDC for configuration neo4j.source.type: Must be one of: 'QUERY'.") + } +} From d24fc64095a79c23112a335a81968b917c19e1c2 Mon Sep 17 00:00:00 2001 From: Ali Ince Date: Tue, 17 Oct 2023 15:39:01 +0100 Subject: [PATCH 2/4] feat: add cdc task --- common/pom.xml | 5 + .../kafka/configuration/helpers/Validators.kt | 14 +- .../neo4j-source-configuration.properties | 6 +- .../configuration/helpers/ValidatorsTest.kt | 136 +++++- .../main/distributions/text/doc/LICENSES.txt | 2 + .../main/distributions/text/doc/NOTICE.txt | 2 + .../kafka/sink/Neo4jConnectorTest.kt | 14 +- source/LICENSES.txt | 2 + source/NOTICE.txt | 2 + source/pom.xml | 11 +- .../connectors/kafka/source/Neo4jCDCTask.kt | 161 +++++++ .../connectors/kafka/source/Neo4jConnector.kt | 17 +- ...jSourceService.kt => Neo4jQueryService.kt} | 6 +- .../{Neo4jSourceTask.kt => Neo4jQueryTask.kt} | 12 +- .../kafka/source/SourceConfiguration.kt | 130 +++++- .../connect/source/Neo4jSourceConnector.kt | 4 +- .../kafka/source/Neo4jCDCTaskTest.kt | 425 ++++++++++++++++++ .../kafka/source/Neo4jConnectorTest.kt | 97 +++- ...ourceTaskTest.kt => Neo4jQueryTaskTest.kt} | 4 +- .../kafka/source/SourceConfigurationTest.kt | 115 +++-- 20 files changed, 1067 insertions(+), 98 deletions(-) create mode 100644 source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTask.kt rename source/src/main/kotlin/org/neo4j/connectors/kafka/source/{Neo4jSourceService.kt => Neo4jQueryService.kt} (97%) rename source/src/main/kotlin/org/neo4j/connectors/kafka/source/{Neo4jSourceTask.kt => Neo4jQueryTask.kt} (77%) create mode 100644 source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTaskTest.kt rename source/src/test/kotlin/org/neo4j/connectors/kafka/source/{Neo4jSourceTaskTest.kt => Neo4jQueryTaskTest.kt} (99%) diff --git a/common/pom.xml b/common/pom.xml index 38d06366a..8d0834358 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -50,6 +50,11 @@ kafka-clients provided + + io.kotest + kotest-assertions-core-jvm + test + org.jetbrains.kotlin kotlin-test diff --git a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt index a170712da..e03f4ffb2 100644 --- a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt +++ b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt @@ -132,7 +132,7 @@ object Validators { when (value) { is String -> { if (value.isBlank()) { - throw ConfigException(name, value, "Must be non-empty.") + throw ConfigException(name, value, "Must not be blank.") } try { @@ -148,7 +148,7 @@ object Validators { } is List<*> -> { if (value.isEmpty()) { - throw ConfigException(name, value, "Must be non-empty.") + throw ConfigException(name, value, "Must not be blank.") } value.forEach { ensureValid(name, it) } @@ -166,7 +166,7 @@ object Validators { override fun ensureValid(name: String?, value: Any?) { if (value is String) { if (value.isBlank()) { - throw ConfigException(name, value, "Must be non-empty.") + throw ConfigException(name, value, "Must not be blank.") } val file = File(value) @@ -184,7 +184,7 @@ object Validators { } } else if (value is List<*>) { if (value.isEmpty()) { - throw ConfigException(name, value, "Must be non-empty.") + throw ConfigException(name, value, "Must not be blank.") } value.forEach { ensureValid(name, it) } @@ -201,8 +201,8 @@ object Validators { .let { config -> if (config.visible() && (when (val value = config.value()) { - is Int? -> value != null - is Boolean? -> value != null + is Int? -> value == null + is Boolean? -> value == null is String? -> value.isNullOrEmpty() is Password? -> value?.value().isNullOrEmpty() is List<*>? -> value.isEmpty() @@ -210,7 +210,7 @@ object Validators { throw IllegalArgumentException( "unexpected value '$value' for configuration $name") })) { - config.addErrorMessage("Must be non-empty.") + config.addErrorMessage("Invalid value for configuration $name: Must not be blank.") } } } diff --git a/common/src/main/resources/neo4j-source-configuration.properties b/common/src/main/resources/neo4j-source-configuration.properties index 817998cb3..3fc9c022d 100644 --- a/common/src/main/resources/neo4j-source-configuration.properties +++ b/common/src/main/resources/neo4j-source-configuration.properties @@ -21,6 +21,8 @@ neo4j.query.streaming-property=Type: String;\nDescription: Property name that is neo4j.query.poll-interval=Type: String;\nDescription: Interval in which the query is executed. topic=Type: String;\nDescription: Kafka topic to push gathered change messages. neo4j.enforce-schema=Type: Boolean;\nDescription: Whether to attach schema to produced change messages. -neo4j.query.batch-size=Type: Integer;\nDescription: Max number of messages pushed for each poll cycle. +neo4j.batch-size=Type: Integer;\nDescription: Max number of messages pushed for each poll cycle. neo4j.query.timeout=Type: Duration;\nDescription: Maximum amount of time source query is allowed to run. -neo4j.cdc.poll-interval=Type: Duration;\nDescription: The interval at which the database will be queried for change data. \ No newline at end of file +neo4j.cdc.poll-interval=Type: Duration;\nDescription: The interval at which the database will be queried for change data. +neo4j.cdc.poll-duration=Type: Duration;\nDescription: The maximum duration a poll request will wait for a change to be received from the database. +neo4j.ignore-stored-offset=Type: Boolean;\nDescription: Whether to ignore any offset value retrieved from the offset storage saved by a previous run. \ No newline at end of file diff --git a/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt b/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt index 99a15b45c..71ad41ef0 100644 --- a/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt +++ b/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt @@ -16,16 +16,22 @@ */ package org.neo4j.connectors.kafka.configuration.helpers +import io.kotest.matchers.collections.shouldContain +import io.kotest.matchers.collections.shouldHaveSize import java.io.File import kotlin.test.assertContains import kotlin.test.assertEquals import kotlin.test.assertFailsWith +import org.apache.kafka.common.config.Config import org.apache.kafka.common.config.ConfigDef import org.apache.kafka.common.config.ConfigException +import org.apache.kafka.common.config.ConfigValue +import org.apache.kafka.common.config.types.Password import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertDoesNotThrow import org.neo4j.connectors.kafka.configuration.AuthenticationType import org.neo4j.connectors.kafka.configuration.ConnectorType +import org.neo4j.connectors.kafka.configuration.helpers.Validators.validateNonEmptyIfVisible class ValidatorsTest { @@ -234,7 +240,7 @@ class ValidatorsTest { } .also { assertEquals( - "Invalid value $v for configuration my.property: Must be non-empty.", it.message) + "Invalid value $v for configuration my.property: Must not be blank.", it.message) } } @@ -282,7 +288,7 @@ class ValidatorsTest { assertFailsWith(ConfigException::class) { Validators.file().ensureValid("my.property", v) } .also { assertEquals( - "Invalid value $v for configuration my.property: Must be non-empty.", it.message) + "Invalid value $v for configuration my.property: Must not be blank.", it.message) } } @@ -320,4 +326,130 @@ class ValidatorsTest { Validators.file().ensureValid("my.property", listOf(f.absolutePath)) } } + + @Test + fun `validate non-empty if visible should behave correctly based on value types if blank`() { + Config( + listOf( + ConfigValue("BOOL_CONFIG", null as Boolean?, emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("BOOL_CONFIG") + + configValues()[0].errorMessages() shouldContain + "Invalid value for configuration BOOL_CONFIG: Must not be blank." + } + + Config( + listOf( + ConfigValue("INT_CONFIG", null as Int?, emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("INT_CONFIG") + + configValues()[0].errorMessages() shouldContain + "Invalid value for configuration INT_CONFIG: Must not be blank." + } + + listOf(null, "").forEach { s -> + Config( + listOf( + ConfigValue("STRING_CONFIG", s, emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("STRING_CONFIG") + + configValues()[0].errorMessages() shouldContain + "Invalid value for configuration STRING_CONFIG: Must not be blank." + } + } + + listOf(null as Password?, Password(null), Password("")).forEach { s -> + Config( + listOf( + ConfigValue("PASSWORD_CONFIG", s, emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("PASSWORD_CONFIG") + + configValues()[0].errorMessages() shouldContain + "Invalid value for configuration PASSWORD_CONFIG: Must not be blank." + } + } + + listOf(null as List<*>?, listOf(), mutableListOf()).forEach { s -> + Config( + listOf( + ConfigValue("LIST_CONFIG", s, emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("LIST_CONFIG") + + configValues()[0].errorMessages() shouldContain + "Invalid value for configuration LIST_CONFIG: Must not be blank." + } + } + } + + @Test + fun `validate non-empty if visible should behave correctly based on value types if not blank`() { + Config( + listOf( + ConfigValue("BOOL_CONFIG", true, emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("BOOL_CONFIG") + + configValues()[0].errorMessages() shouldHaveSize 0 + } + + Config( + listOf( + ConfigValue("INT_CONFIG", 10_000, emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("INT_CONFIG") + + configValues()[0].errorMessages() shouldHaveSize 0 + } + + Config( + listOf( + ConfigValue("STRING_CONFIG", "value", emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("STRING_CONFIG") + + configValues()[0].errorMessages() shouldHaveSize 0 + } + + Config( + listOf( + ConfigValue("PASSWORD_CONFIG", Password("value"), emptyList(), mutableListOf()) + .also { it.visible(true) })) + .apply { + validateNonEmptyIfVisible("PASSWORD_CONFIG") + + configValues()[0].errorMessages() shouldHaveSize 0 + } + + Config( + listOf( + ConfigValue("LIST_CONFIG", listOf("value"), emptyList(), mutableListOf()).also { + it.visible(true) + })) + .apply { + validateNonEmptyIfVisible("LIST_CONFIG") + + configValues()[0].errorMessages() shouldHaveSize 0 + } + } } diff --git a/packaging/src/main/distributions/text/doc/LICENSES.txt b/packaging/src/main/distributions/text/doc/LICENSES.txt index ebef93530..610653f38 100644 --- a/packaging/src/main/distributions/text/doc/LICENSES.txt +++ b/packaging/src/main/distributions/text/doc/LICENSES.txt @@ -18,6 +18,8 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk7 Kotlin Stdlib Jdk8 kotlinx-coroutines-core + kotlinx-coroutines-reactive + kotlinx-coroutines-reactor Neo4j Java Driver Non-Blocking Reactive Foundation for the JVM ------------------------------------------------------------------------------ diff --git a/packaging/src/main/distributions/text/doc/NOTICE.txt b/packaging/src/main/distributions/text/doc/NOTICE.txt index 9e5eba3de..7ad986fd5 100644 --- a/packaging/src/main/distributions/text/doc/NOTICE.txt +++ b/packaging/src/main/distributions/text/doc/NOTICE.txt @@ -33,6 +33,8 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk7 Kotlin Stdlib Jdk8 kotlinx-coroutines-core + kotlinx-coroutines-reactive + kotlinx-coroutines-reactor Neo4j Java Driver Non-Blocking Reactive Foundation for the JVM diff --git a/sink/src/test/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnectorTest.kt b/sink/src/test/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnectorTest.kt index b89a59302..bc59a443d 100644 --- a/sink/src/test/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnectorTest.kt +++ b/sink/src/test/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnectorTest.kt @@ -33,13 +33,13 @@ class Neo4jConnectorTest { .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_USERNAME } .errorMessages(), - "Must be non-empty.") + "Invalid value for configuration neo4j.authentication.basic.username: Must not be blank.") assertContains( config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_PASSWORD } .errorMessages(), - "Must be non-empty.") + "Invalid value for configuration neo4j.authentication.basic.password: Must not be blank.") assertTrue { config .configValues() @@ -60,7 +60,7 @@ class Neo4jConnectorTest { .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_KERBEROS_TICKET } .errorMessages(), - "Must be non-empty.") + "Invalid value for configuration neo4j.authentication.kerberos.ticket: Must not be blank.") } @Test @@ -74,7 +74,7 @@ class Neo4jConnectorTest { .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BEARER_TOKEN } .errorMessages(), - "Must be non-empty.") + "Invalid value for configuration neo4j.authentication.bearer.token: Must not be blank.") } @Test @@ -88,19 +88,19 @@ class Neo4jConnectorTest { .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_SCHEME } .errorMessages(), - "Must be non-empty.") + "Invalid value for configuration neo4j.authentication.custom.scheme: Must not be blank.") assertContains( config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_PRINCIPAL } .errorMessages(), - "Must be non-empty.") + "Invalid value for configuration neo4j.authentication.custom.principal: Must not be blank.") assertContains( config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_CREDENTIALS } .errorMessages(), - "Must be non-empty.") + "Invalid value for configuration neo4j.authentication.custom.credentials: Must not be blank.") assertTrue { config .configValues() diff --git a/source/LICENSES.txt b/source/LICENSES.txt index ebef93530..610653f38 100644 --- a/source/LICENSES.txt +++ b/source/LICENSES.txt @@ -18,6 +18,8 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk7 Kotlin Stdlib Jdk8 kotlinx-coroutines-core + kotlinx-coroutines-reactive + kotlinx-coroutines-reactor Neo4j Java Driver Non-Blocking Reactive Foundation for the JVM ------------------------------------------------------------------------------ diff --git a/source/NOTICE.txt b/source/NOTICE.txt index 9e5eba3de..7ad986fd5 100644 --- a/source/NOTICE.txt +++ b/source/NOTICE.txt @@ -33,6 +33,8 @@ Apache Software License, Version 2.0 Kotlin Stdlib Jdk7 Kotlin Stdlib Jdk8 kotlinx-coroutines-core + kotlinx-coroutines-reactive + kotlinx-coroutines-reactor Neo4j Java Driver Non-Blocking Reactive Foundation for the JVM diff --git a/source/pom.xml b/source/pom.xml index 11d38933c..7a3b8101d 100644 --- a/source/pom.xml +++ b/source/pom.xml @@ -12,9 +12,13 @@ source Neo4j Connector for Kafka - Source - 1.0.3 + 1.0.4-SNAPSHOT + + org.jetbrains.kotlinx + kotlinx-coroutines-reactor + org.neo4j.connectors cdc @@ -65,6 +69,11 @@ mockito-core test + + org.mockito.kotlin + mockito-kotlin + test + org.neo4j.connectors.kafka testing diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTask.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTask.kt new file mode 100644 index 000000000..992680163 --- /dev/null +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTask.kt @@ -0,0 +1,161 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.neo4j.connectors.kafka.source + +import java.util.concurrent.atomic.AtomicReference +import kotlin.time.TimeSource +import kotlin.time.toJavaDuration +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.asFlow +import kotlinx.coroutines.flow.flatMapConcat +import kotlinx.coroutines.flow.toList +import kotlinx.coroutines.reactive.asFlow +import kotlinx.coroutines.runBlocking +import org.apache.kafka.connect.data.Schema +import org.apache.kafka.connect.source.SourceRecord +import org.apache.kafka.connect.source.SourceTask +import org.neo4j.cdc.client.CDCClient +import org.neo4j.cdc.client.CDCService +import org.neo4j.cdc.client.model.ChangeEvent +import org.neo4j.cdc.client.model.ChangeIdentifier +import org.neo4j.cdc.client.model.NodeEvent +import org.neo4j.cdc.client.model.RelationshipEvent +import org.neo4j.connectors.kafka.configuration.helpers.VersionUtil +import org.neo4j.driver.SessionConfig +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +class Neo4jCDCTask : SourceTask() { + private val log: Logger = LoggerFactory.getLogger(Neo4jCDCTask::class.java) + + private lateinit var settings: Map + private lateinit var config: SourceConfiguration + private lateinit var sessionConfig: SessionConfig + private lateinit var cdc: CDCService + private lateinit var offset: AtomicReference + + override fun version(): String = VersionUtil.version(this.javaClass as Class<*>) + + override fun start(props: MutableMap?) { + log.info("starting") + + settings = props!! + config = SourceConfiguration(settings) + val configBuilder = SessionConfig.builder() + if (config.database.isNotBlank()) { + configBuilder.withDatabase(config.database) + } + sessionConfig = configBuilder.build() + + cdc = + CDCClient( + config.driver, + { sessionConfig }, + config.cdcPollingInterval.toJavaDuration(), + *config.cdcSelectors.toTypedArray()) + log.debug("constructed cdc client") + + offset = AtomicReference(resumeFrom(config, cdc)) + log.info("resuming from offset: ${offset.get()}") + } + + override fun stop() { + log.info("stopping") + config.close() + } + + @OptIn(ExperimentalCoroutinesApi::class) + override fun poll(): MutableList { + log.debug("polling") + val list = mutableListOf() + + runBlocking { + val timeSource = TimeSource.Monotonic + val start = timeSource.markNow() + val limit = start + config.cdcPollingDuration + + while (limit.hasNotPassedNow()) { + cdc.query(ChangeIdentifier(offset.get())) + .take(config.batchSize.toLong(), true) + .asFlow() + .flatMapConcat { build(it) } + .toList(list) + if (list.isNotEmpty()) { + break + } + + delay(config.cdcPollingInterval) + } + + if (list.isNotEmpty()) { + offset.set(list.last().sourceOffset()["value"] as String) + } + } + + log.debug("poll resulted in {} messages", list.size) + return list + } + + private fun build(changeEvent: ChangeEvent): Flow { + val result = mutableListOf() + + config.cdcSelectorsToTopics.forEach { + if (it.key.matches(changeEvent)) { + result.addAll( + it.value.map { topic -> + SourceRecord( + config.partition, + mapOf("value" to changeEvent.id.id), + topic, + Schema.STRING_SCHEMA, + when (val event = changeEvent.event) { + is NodeEvent -> event.elementId + is RelationshipEvent -> event.elementId + else -> throw IllegalArgumentException("unknown event type: ${event.eventType}") + }, + Schema.STRING_SCHEMA, + it.key.applyProperties(changeEvent).toString()) + }) + } + } + + return result.asFlow() + } + + private fun resumeFrom(config: SourceConfiguration, cdc: CDCService): String { + val offset = context.offsetStorageReader().offset(config.partition) ?: emptyMap() + if (!config.ignoreStoredOffset && offset["value"] != null && offset["value"] is String) { + log.debug("previously stored offset is {}", offset["value"]) + return offset["value"] as String + } + + val value = + when (config.startFrom) { + StartFrom.EARLIEST -> cdc.earliest().block()?.id!! + StartFrom.NOW -> cdc.current().block()?.id!! + StartFrom.USER_PROVIDED -> config.startFromCustom + } + log.debug( + "{} is set as {}, offset to resume from is {}", + SourceConfiguration.START_FROM, + config.startFrom, + value) + return value + } +} diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt index 544dc1f77..630a3bc09 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt @@ -20,18 +20,27 @@ import org.apache.kafka.common.config.Config import org.apache.kafka.common.config.ConfigDef import org.apache.kafka.connect.connector.Task import org.apache.kafka.connect.source.SourceConnector -import org.neo4j.connectors.kafka.utils.PropertiesUtil +import org.neo4j.connectors.kafka.configuration.helpers.VersionUtil class Neo4jConnector : SourceConnector() { private lateinit var props: Map + private lateinit var config: SourceConfiguration - override fun version(): String = PropertiesUtil.getVersion() + override fun version(): String = VersionUtil.version(Neo4jConnector::class.java) override fun start(props: MutableMap?) { - this.props = props!!.toMap() + val originalProps = props!!.toMap() + val config = SourceConfiguration(originalProps) + config.validate() + this.props = originalProps + this.config = config } - override fun taskClass(): Class = Neo4jSourceTask::class.java + override fun taskClass(): Class = + when (config.strategy) { + SourceType.CDC -> Neo4jCDCTask::class.java + SourceType.QUERY -> Neo4jQueryTask::class.java + } override fun taskConfigs(maxTasks: Int): List> = listOf(props) diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceService.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryService.kt similarity index 97% rename from source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceService.kt rename to source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryService.kt index a416cc13d..556e5bd97 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceService.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryService.kt @@ -40,12 +40,12 @@ import org.neo4j.driver.Values import org.slf4j.Logger import org.slf4j.LoggerFactory -class Neo4jSourceService( +class Neo4jQueryService( private val config: SourceConfiguration, offsetStorageReader: OffsetStorageReader ) : AutoCloseable { - private val log: Logger = LoggerFactory.getLogger(Neo4jSourceService::class.java) + private val log: Logger = LoggerFactory.getLogger(Neo4jQueryService::class.java) private val queue: BlockingQueue = LinkedBlockingQueue() private val error: AtomicReference = AtomicReference(null) @@ -184,7 +184,7 @@ class Neo4jSourceService( val events = mutableListOf() return try { events.add(firstEvent) - queue.drainTo(events, config.queryBatchSize - 1) + queue.drainTo(events, config.batchSize - 1) log.info("Poll returns {} result(s)", events.size) events } catch (e: Exception) { diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTask.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTask.kt similarity index 77% rename from source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTask.kt rename to source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTask.kt index 8a7ba00fe..612864492 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTask.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTask.kt @@ -24,27 +24,27 @@ import org.neo4j.connectors.kafka.utils.StreamsUtils import org.slf4j.Logger import org.slf4j.LoggerFactory -class Neo4jSourceTask : SourceTask() { +class Neo4jQueryTask : SourceTask() { private lateinit var props: Map private lateinit var config: SourceConfiguration - private lateinit var neo4jSourceService: Neo4jSourceService + private lateinit var neo4JQueryService: Neo4jQueryService - private val log: Logger = LoggerFactory.getLogger(Neo4jSourceTask::class.java) + private val log: Logger = LoggerFactory.getLogger(Neo4jQueryTask::class.java) override fun version(): String = VersionUtil.version(this.javaClass) override fun start(props: MutableMap?) { this.props = props!! config = SourceConfiguration(this.props) - neo4jSourceService = Neo4jSourceService(config, context.offsetStorageReader()) + neo4JQueryService = Neo4jQueryService(config, context.offsetStorageReader()) } @DelicateCoroutinesApi override fun stop() { log.info("Stop() - Closing Neo4j Source Service.") StreamsUtils.ignoreExceptions( - { neo4jSourceService.close() }, UninitializedPropertyAccessException::class.java) + { neo4JQueryService.close() }, UninitializedPropertyAccessException::class.java) } - override fun poll(): List? = neo4jSourceService.poll() + override fun poll(): List? = neo4JQueryService.poll() } diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt index 381e857cd..281ff71ff 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt @@ -20,11 +20,16 @@ import java.util.function.Predicate import kotlin.time.Duration import kotlin.time.Duration.Companion.seconds import kotlin.time.toJavaDuration +import org.apache.kafka.common.config.Config import org.apache.kafka.common.config.ConfigDef import org.apache.kafka.common.config.ConfigDef.Range import org.apache.kafka.common.config.ConfigException import org.neo4j.cdc.client.pattern.Pattern import org.neo4j.cdc.client.pattern.PatternException +import org.neo4j.cdc.client.selector.EntitySelector +import org.neo4j.cdc.client.selector.NodeSelector +import org.neo4j.cdc.client.selector.RelationshipSelector +import org.neo4j.cdc.client.selector.Selector import org.neo4j.connectors.kafka.configuration.ConnectorType import org.neo4j.connectors.kafka.configuration.DeprecatedNeo4jConfiguration import org.neo4j.connectors.kafka.configuration.Neo4jConfiguration @@ -60,6 +65,9 @@ class SourceConfiguration(originals: Map<*, *>) : val startFromCustom get(): String = getString(START_FROM_VALUE) + val ignoreStoredOffset + get(): Boolean = getBoolean(IGNORE_STORED_OFFSET) + val enforceSchema get(): Boolean = getBoolean(ENFORCE_SCHEMA) @@ -75,8 +83,8 @@ class SourceConfiguration(originals: Map<*, *>) : val queryPollingInterval get(): Duration = Duration.parseSimpleString(getString(QUERY_POLL_INTERVAL)) - val queryBatchSize - get(): Int = getInt(QUERY_BATCH_SIZE) + val batchSize + get(): Int = getInt(BATCH_SIZE) val queryTimeout get(): Duration = Duration.parseSimpleString(getString(QUERY_TIMEOUT)) @@ -94,6 +102,61 @@ class SourceConfiguration(originals: Map<*, *>) : } } + val cdcPollingInterval + get(): Duration = Duration.parseSimpleString(getString(CDC_POLL_INTERVAL)) + + val cdcPollingDuration + get(): Duration = Duration.parseSimpleString(getString(CDC_POLL_DURATION)) + + val cdcSelectorsToTopics: Map> by lazy { + when (strategy) { + SourceType.CDC -> { + val map = mutableMapOf>() + + originals() + .entries + .filter { CDC_PATTERNS_REGEX.matches(it.key) } + .flatMap { + Pattern.parse(it.value as String?) + .flatMap { it.toSelector() } + .map { key -> key to CDC_PATTERNS_REGEX.matchEntire(it.key)!!.groupValues[1] } + } + .forEach { + if (!map.containsKey(it.first)) { + map[it.first] = mutableListOf() + } + + val list = map[it.first]!! + list.add(it.second) + list.sort() + } + + map + } + else -> emptyMap() + } + } + + val cdcSelectors: Set by lazy { + cdcSelectorsToTopics.keys + .map { + when (it) { + is NodeSelector -> + NodeSelector( + it.change, + it.changesTo, + it.labels, + it.key, + ) + is RelationshipSelector -> + RelationshipSelector(it.change, it.changesTo, it.type, it.start, it.end, it.key) + is EntitySelector -> EntitySelector(it.change, it.changesTo) + else -> throw IllegalStateException("unexpected pattern type ${it.javaClass.name}") + } + } + .toSet() + } + override fun txConfig(): TransactionConfig { val original = super.txConfig() val new = TransactionConfig.builder() @@ -107,25 +170,46 @@ class SourceConfiguration(originals: Map<*, *>) : return new.build() } + fun validate() { + val def = config() + val originals = originalsStrings() + val values = def.validate(originals) + val config = Config(values) + + validate(config, originals) + + val errors = + config + .configValues() + .filter { v -> v.errorMessages().isNotEmpty() } + .flatMap { v -> v.errorMessages() } + if (errors.isNotEmpty()) { + throw ConfigException(errors.joinToString()) + } + } + companion object { const val START_FROM = "neo4j.start-from" const val START_FROM_VALUE = "neo4j.start-from.value" + const val IGNORE_STORED_OFFSET = "neo4j.ignore-stored-offset" const val STRATEGY = "neo4j.source-strategy" + const val BATCH_SIZE = "neo4j.batch-size" const val QUERY = "neo4j.query" const val QUERY_STREAMING_PROPERTY = "neo4j.query.streaming-property" const val QUERY_POLL_INTERVAL = "neo4j.query.poll-interval" - const val QUERY_BATCH_SIZE = "neo4j.query.batch-size" const val QUERY_TIMEOUT = "neo4j.query.timeout" const val TOPIC = "topic" const val ENFORCE_SCHEMA = "neo4j.enforce-schema" const val CDC_POLL_INTERVAL = "neo4j.cdc.poll-interval" + const val CDC_POLL_DURATION = "neo4j.cdc.poll-duration" private val CDC_PATTERNS_REGEX = Regex("^neo4j\\.cdc\\.topic\\.([a-zA-Z0-9._-]+)(\\.patterns)?$") private val DEFAULT_POLL_INTERVAL = 10.seconds - private const val DEFAULT_QUERY_BATCH_SIZE = 1000 + private const val DEFAULT_BATCH_SIZE = 1000 private val DEFAULT_QUERY_TIMEOUT = 0.seconds - private val DEFAULT_CDC_POLL_INTERVAL = 10.seconds + private val DEFAULT_CDC_POLL_INTERVAL = 1.seconds + private val DEFAULT_CDC_POLL_DURATION = 5.seconds fun migrateSettings(oldSettings: Map): Map { val migrated = Neo4jConfiguration.migrateSettings(oldSettings, true).toMutableMap() @@ -151,8 +235,7 @@ class SourceConfiguration(originals: Map<*, *>) : DeprecatedNeo4jSourceConfiguration.ENFORCE_SCHEMA -> migrated[ENFORCE_SCHEMA] = it.value.toString() DeprecatedNeo4jSourceConfiguration.TOPIC -> migrated[TOPIC] = it.value.toString() - DeprecatedNeo4jConfiguration.BATCH_SIZE -> - migrated[QUERY_BATCH_SIZE] = it.value.toString() + DeprecatedNeo4jConfiguration.BATCH_SIZE -> migrated[BATCH_SIZE] = it.value.toString() DeprecatedNeo4jConfiguration.BATCH_TIMEOUT_MSECS -> migrated[QUERY_TIMEOUT] = "${it.value}ms" else -> @@ -174,12 +257,14 @@ class SourceConfiguration(originals: Map<*, *>) : // START_FROM user defined validation config.validateNonEmptyIfVisible(START_FROM_VALUE) + // COMMON fields + config.validateNonEmptyIfVisible(BATCH_SIZE) + // QUERY strategy validation config.validateNonEmptyIfVisible(TOPIC) config.validateNonEmptyIfVisible(QUERY) config.validateNonEmptyIfVisible(QUERY_TIMEOUT) config.validateNonEmptyIfVisible(QUERY_POLL_INTERVAL) - config.validateNonEmptyIfVisible(QUERY_BATCH_SIZE) // CDC validation config.validateNonEmptyIfVisible(CDC_POLL_INTERVAL) @@ -188,9 +273,9 @@ class SourceConfiguration(originals: Map<*, *>) : val strategy = configList.find { it.name() == STRATEGY } if (strategy?.value() == SourceType.CDC.name) { val cdcTopics = originals.entries.filter { CDC_PATTERNS_REGEX.matches(it.key) } - if (cdcTopics.isEmpty() || cdcTopics.size > 1) { + if (cdcTopics.isEmpty()) { strategy.addErrorMessage( - "Exactly one topic needs to be configured with pattern(s) describing the entities to query changes for. Please refer to documentation for more information.") + "At least one topic needs to be configured with pattern(s) describing the entities to query changes for. Please refer to documentation for more information.") } else { cdcTopics.forEach { // parse & validate CDC patterns @@ -238,11 +323,17 @@ class SourceConfiguration(originals: Map<*, *>) : Recommenders.visibleIf( START_FROM, Predicate.isEqual(StartFrom.USER_PROVIDED.name)) }) + .define( + ConfigKeyBuilder.of(IGNORE_STORED_OFFSET, ConfigDef.Type.BOOLEAN) { + documentation = PropertiesUtil.getProperty(IGNORE_STORED_OFFSET) + importance = ConfigDef.Importance.HIGH + defaultValue = false + }) .define( ConfigKeyBuilder.of(TOPIC, ConfigDef.Type.STRING) { documentation = PropertiesUtil.getProperty(TOPIC) importance = ConfigDef.Importance.HIGH - validator = ConfigDef.NonEmptyString() + defaultValue = "" dependents = listOf(STRATEGY) recommender = Recommenders.visibleIf(STRATEGY, Predicate.isEqual(SourceType.QUERY.name)) @@ -251,6 +342,7 @@ class SourceConfiguration(originals: Map<*, *>) : ConfigKeyBuilder.of(QUERY, ConfigDef.Type.STRING) { documentation = PropertiesUtil.getProperty(QUERY) importance = ConfigDef.Importance.HIGH + defaultValue = "" dependents = listOf(STRATEGY) recommender = Recommenders.visibleIf(STRATEGY, Predicate.isEqual(SourceType.QUERY.name)) @@ -275,14 +367,14 @@ class SourceConfiguration(originals: Map<*, *>) : defaultValue = DEFAULT_POLL_INTERVAL.toSimpleString() }) .define( - ConfigKeyBuilder.of(QUERY_BATCH_SIZE, ConfigDef.Type.INT) { - documentation = PropertiesUtil.getProperty(QUERY_BATCH_SIZE) + ConfigKeyBuilder.of(BATCH_SIZE, ConfigDef.Type.INT) { + documentation = PropertiesUtil.getProperty(BATCH_SIZE) importance = ConfigDef.Importance.HIGH dependents = listOf(STRATEGY) recommender = Recommenders.visibleIf(STRATEGY, Predicate.isEqual(SourceType.QUERY.name)) validator = Range.atLeast(1) - defaultValue = DEFAULT_QUERY_BATCH_SIZE + defaultValue = DEFAULT_BATCH_SIZE }) .define( ConfigKeyBuilder.of(QUERY_TIMEOUT, ConfigDef.Type.STRING) { @@ -304,6 +396,16 @@ class SourceConfiguration(originals: Map<*, *>) : validator = Validators.pattern(SIMPLE_DURATION_PATTERN) defaultValue = DEFAULT_CDC_POLL_INTERVAL.toSimpleString() }) + .define( + ConfigKeyBuilder.of(CDC_POLL_DURATION, ConfigDef.Type.STRING) { + documentation = PropertiesUtil.getProperty(CDC_POLL_DURATION) + importance = ConfigDef.Importance.HIGH + dependents = listOf(STRATEGY) + recommender = + Recommenders.visibleIf(STRATEGY, Predicate.isEqual(SourceType.CDC.name)) + validator = Validators.pattern(SIMPLE_DURATION_PATTERN) + defaultValue = DEFAULT_CDC_POLL_DURATION.toSimpleString() + }) .define( ConfigKeyBuilder.of(ENFORCE_SCHEMA, ConfigDef.Type.BOOLEAN) { documentation = PropertiesUtil.getProperty(ENFORCE_SCHEMA) diff --git a/source/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceConnector.kt b/source/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceConnector.kt index 78c3f5c22..efe0e6103 100644 --- a/source/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceConnector.kt +++ b/source/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceConnector.kt @@ -20,7 +20,7 @@ import org.apache.kafka.common.config.ConfigDef import org.apache.kafka.connect.connector.Task import org.apache.kafka.connect.source.SourceConnector import org.neo4j.connectors.kafka.source.DeprecatedNeo4jSourceConfiguration -import org.neo4j.connectors.kafka.source.Neo4jSourceTask +import org.neo4j.connectors.kafka.source.Neo4jQueryTask import org.neo4j.connectors.kafka.source.SourceConfiguration import org.neo4j.connectors.kafka.utils.PropertiesUtil @@ -45,7 +45,7 @@ class Neo4jSourceConnector : SourceConnector() { override fun version(): String = PropertiesUtil.getVersion() - override fun taskClass(): Class = Neo4jSourceTask::class.java + override fun taskClass(): Class = Neo4jQueryTask::class.java override fun config(): ConfigDef = DeprecatedNeo4jSourceConfiguration.config() } diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTaskTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTaskTest.kt new file mode 100644 index 000000000..baa04f561 --- /dev/null +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTaskTest.kt @@ -0,0 +1,425 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.neo4j.connectors.kafka.source + +import io.kotest.matchers.collections.shouldHaveSize +import io.kotest.matchers.comparables.shouldBeGreaterThan +import io.kotest.matchers.comparables.shouldBeLessThan +import kotlin.time.Duration.Companion.seconds +import kotlin.time.measureTime +import org.apache.kafka.connect.source.SourceTask +import org.apache.kafka.connect.source.SourceTaskContext +import org.apache.kafka.connect.storage.OffsetStorageReader +import org.junit.jupiter.api.AfterAll +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.EnumSource +import org.mockito.ArgumentMatchers +import org.mockito.kotlin.doReturn +import org.mockito.kotlin.mock +import org.neo4j.connectors.kafka.configuration.AuthenticationType +import org.neo4j.connectors.kafka.configuration.Neo4jConfiguration +import org.neo4j.driver.AuthTokens +import org.neo4j.driver.Driver +import org.neo4j.driver.GraphDatabase +import org.neo4j.driver.Session +import org.testcontainers.containers.Neo4jContainer +import org.testcontainers.junit.jupiter.Container +import org.testcontainers.junit.jupiter.Testcontainers + +@Testcontainers +class Neo4jCDCTaskTest { + companion object { + @Container + val neo4j: Neo4jContainer<*> = + Neo4jContainer("neo4j:5-enterprise") + .withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes") + .withNeo4jConfig("internal.dbms.change_data_capture", "true") + .withoutAuthentication() + + private lateinit var driver: Driver + private lateinit var session: Session + + @BeforeAll + @JvmStatic + fun setUpContainer() { + driver = GraphDatabase.driver(neo4j.boltUrl, AuthTokens.none()) + session = driver.session() + } + + @AfterAll + @JvmStatic + fun tearDownContainer() { + session.close() + driver.close() + } + + fun currentChangeId(): String { + return session.run("CALL cdc.current").single().get(0).asString() + } + + fun earliestChangeId(): String { + return session.run("CALL cdc.earliest").single().get(0).asString() + } + } + + private lateinit var task: SourceTask + + @AfterEach + fun after() { + task.stop() + } + + @BeforeEach + fun before() { + session + .run( + "CREATE OR REPLACE DATABASE \$db OPTIONS { txLogEnrichment: \$mode } WAIT", + mapOf("db" to "neo4j", "mode" to "FULL")) + .consume() + + task = Neo4jCDCTask() + task.initialize(newTaskContextWithOffset()) + } + + @Test + fun `should use correct offset when startFrom=earliest`() { + // create data, 200 nodes + 100 relationships + session.run("UNWIND RANGE(1, 100) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // start task with EARLIEST, previous changes should be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.EARLIEST.toString(), + "neo4j.cdc.topic.nodes" to "()", + "neo4j.cdc.topic.relationships" to "()-[]-()")) + + // poll for changes + val changes = task.poll() + + // expect to see previously created data + changes shouldHaveSize 100 * 2 + 100 + } + + @Test + fun `should use correct offset when startFrom=now`() { + // create data (1), 200 nodes + 100 relationships + session.run("UNWIND RANGE(1, 100) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // start task with NOW, previous changes should NOT be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.NOW.toString(), + "neo4j.cdc.topic.nodes" to "()", + "neo4j.cdc.topic.relationships" to "()-[]-()")) + + // create data (2) + session.run("UNWIND RANGE(1, 75) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // poll for changes + val changes = task.poll() + + // expected to see created data (2) + changes shouldHaveSize 75 * 2 + 75 + } + + @Test + fun `should use correct offset when startFrom=user provided`() { + // create data (1), 200 nodes + 100 relationships + session.run("UNWIND RANGE(1, 100) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // capture change identifier + val changeId = currentChangeId() + + // create data (2), 150 nodes + 75 relationships + session.run("UNWIND RANGE(1, 75) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // start task with USER_PROVIDED, with value set as captured change identifier + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.USER_PROVIDED.toString(), + SourceConfiguration.START_FROM_VALUE to changeId, + "neo4j.cdc.topic.nodes" to "()", + "neo4j.cdc.topic.relationships" to "()-[]-()")) + + // poll for changes + val changes = task.poll() + + // expected to see created data (2) + changes shouldHaveSize 75 * 2 + 75 + } + + @ParameterizedTest + @EnumSource(StartFrom::class) + fun `should use stored offset regardless of provided startFrom`(startFrom: StartFrom) { + // create data (1), 200 nodes + 100 relationships + session.run("UNWIND RANGE(1, 100) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // capture change identifier and set it as stored offset + task.initialize(newTaskContextWithCurrentChangeId()) + + // create data (2), 150 nodes + 75 relationships + session.run("UNWIND RANGE(1, 75) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // start task with provided START_FROM, with the mocked task context + task.start( + buildMap { + put(Neo4jConfiguration.URI, neo4j.boltUrl) + put(Neo4jConfiguration.AUTHENTICATION_TYPE, AuthenticationType.NONE.toString()) + put(SourceConfiguration.STRATEGY, SourceType.CDC.toString()) + put(SourceConfiguration.START_FROM, startFrom.toString()) + if (startFrom == StartFrom.USER_PROVIDED) { + put(SourceConfiguration.START_FROM_VALUE, earliestChangeId()) + } + put("neo4j.cdc.topic.nodes", "()") + put("neo4j.cdc.topic.relationships", "()-[]-()") + }) + + // poll for changes + val changes = task.poll() + + // expected to see create data (2) because of the stored offset value + changes shouldHaveSize 75 * 2 + 75 + } + + @Test + fun `should ignore stored offset when startFrom=earliest`() { + // create data (1), 100 nodes + 50 relationships + session.run("UNWIND RANGE(1, 50) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // capture change identifier and set it as stored offset + task.initialize(newTaskContextWithCurrentChangeId()) + + // create data (2), 100 nodes + 50 relationships + session.run("UNWIND RANGE(1, 50) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // start task with EARLIEST, previous changes should be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.EARLIEST.toString(), + SourceConfiguration.IGNORE_STORED_OFFSET to "true", + "neo4j.cdc.topic.nodes" to "()", + "neo4j.cdc.topic.relationships" to "()-[]-()")) + + // poll for changes + val changes = task.poll() + + // expect to see previously created data + changes shouldHaveSize 100 * 2 + 100 + } + + @Test + fun `should ignore stored offset when startFrom=now`() { + // capture change identifier and set it as stored offset + task.initialize(newTaskContextWithCurrentChangeId()) + + // create data (1), 100 nodes + 50 relationships + session.run("UNWIND RANGE(1, 50) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // start task with NOW, previous changes should NOT be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.NOW.toString(), + SourceConfiguration.IGNORE_STORED_OFFSET to "true", + "neo4j.cdc.topic.nodes" to "()", + "neo4j.cdc.topic.relationships" to "()-[]-()")) + + // create data (2), 100 nodes + 50 relationships + session.run("UNWIND RANGE(1, 50) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // poll for changes + val changes = task.poll() + + // expect to see previously created data + changes shouldHaveSize 50 * 2 + 50 + } + + @Test + fun `should ignore stored offset when startFrom=user provided`() { + // capture change identifier and set it as stored offset + task.initialize(newTaskContextWithCurrentChangeId()) + + // create data (1), 100 nodes + 50 relationships + session.run("UNWIND RANGE(1, 50) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // start task with USER_PROVIDED, previous changes should NOT be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.USER_PROVIDED.toString(), + SourceConfiguration.START_FROM_VALUE to currentChangeId(), + SourceConfiguration.IGNORE_STORED_OFFSET to "true", + "neo4j.cdc.topic.nodes" to "()", + "neo4j.cdc.topic.relationships" to "()-[]-()")) + + // create data (2), 100 nodes + 50 relationships + session.run("UNWIND RANGE(1, 50) AS x CREATE (n), (m), (n)-[:RELATED_TO]->(m)").consume() + + // poll for changes + val changes = task.poll() + + // expect to see previously created data + changes shouldHaveSize 50 * 2 + 50 + } + + @Test + fun `should route change events based on matched selectors`() { + session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Person) REQUIRE n.id IS KEY").consume() + session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Company) REQUIRE n.id IS KEY").consume() + session + .run("CREATE CONSTRAINT IF NOT EXISTS FOR ()-[r:WORKS_FOR]->() REQUIRE r.id IS KEY") + .consume() + + session + .run("UNWIND RANGE(1, 100) AS n CREATE (p:Person) SET p.id = n, p.name = 'name ' + n") + .consume() + session + .run("UNWIND RANGE(1, 25) AS n CREATE (p:Company) SET p.id = n, p.name = 'company ' + n") + .consume() + session + .run( + """ + UNWIND RANGE(1, 100, 2) AS n + MATCH (p:Person {id: n}) + MATCH (c:Company {id: n%25+1}) + CREATE (p)-[:WORKS_FOR {id: n, since: date()}]->(c) + """ + .trimIndent()) + .consume() + + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.EARLIEST.toString(), + "neo4j.cdc.topic.nodes" to "()", + "neo4j.cdc.topic.relationships" to "()-[]-()", + "neo4j.cdc.topic.people" to "(:Person)", + "neo4j.cdc.topic.people-no-id" to "(:Person {-id})", + "neo4j.cdc.topic.people-key" to "(:Person {id:5})", + "neo4j.cdc.topic.company" to "(:Company)", + "neo4j.cdc.topic.works_for" to "(:Person)-[:WORKS_FOR]->(:Company)", + "neo4j.cdc.topic.works_for-no-since" to "(:Person)-[:WORKS_FOR{-since}]->(:Company)", + "neo4j.cdc.topic.works_for-key" to "(:Person)-[:WORKS_FOR{id: 11}]->(:Company)", + "neo4j.cdc.topic.none" to "(:People),()-[:KNOWS]-()", + )) + + val changes = task.poll().toList() + + changes.filter { it.topic() == "nodes" } shouldHaveSize 125 + changes.filter { it.topic() == "relationships" } shouldHaveSize 50 + changes.filter { it.topic() == "people" } shouldHaveSize 100 + changes.filter { it.topic() == "people-no-id" } shouldHaveSize 100 + changes.filter { it.topic() == "people-key" } shouldHaveSize 1 + changes.filter { it.topic() == "company" } shouldHaveSize 25 + changes.filter { it.topic() == "works_for" } shouldHaveSize 50 + changes.filter { it.topic() == "works_for-no-since" } shouldHaveSize 50 + changes.filter { it.topic() == "works_for-key" } shouldHaveSize 1 + changes.filter { it.topic() == "none" } shouldHaveSize 0 + } + + @Test + fun `batch size should be respected`() { + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.EARLIEST.toString(), + SourceConfiguration.BATCH_SIZE to "5", + "neo4j.cdc.topic.nodes" to "()")) + + session.run("UNWIND RANGE(1, 100) AS n CREATE ()").consume() + + // should return records in batches of `5` as configured + for (i in 1..20) { + val changes = task.poll().toList() + + changes shouldHaveSize 5 + } + + measureTime { + val changes = task.poll().toList() + + changes shouldHaveSize 0 + } shouldBeGreaterThan 5.seconds + } + + @Test + fun `poll duration should be respected`() { + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.CDC.toString(), + SourceConfiguration.START_FROM to StartFrom.EARLIEST.toString(), + SourceConfiguration.CDC_POLL_DURATION to "5s", + "neo4j.cdc.topic.nodes" to "()")) + + // should block at most CDC_POLL_DURATION waiting for an event + measureTime { + val changes = task.poll().toList() + + changes shouldHaveSize 0 + } shouldBeGreaterThan 5.seconds + + session.run("UNWIND RANGE(1, 100) AS n CREATE ()").consume() + + // should return immediately when changes are returned + measureTime { + val changes = task.poll().toList() + + changes shouldHaveSize 100 + } shouldBeLessThan 5.seconds + } + + private fun newTaskContextWithCurrentChangeId(): SourceTaskContext { + return newTaskContextWithOffset(mapOf("value" to currentChangeId())) + } + + private fun newTaskContextWithOffset( + offsetMap: Map = emptyMap() + ): SourceTaskContext { + val offsetStorageReader = + mock { + on { offset(ArgumentMatchers.anyMap()) } doReturn offsetMap + } + + return mock { on { offsetStorageReader() } doReturn offsetStorageReader } + } +} diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt index ca8cb1463..730e5391b 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt @@ -17,8 +17,7 @@ package org.neo4j.connectors.kafka.source import io.kotest.matchers.collections.shouldContain -import io.kotest.matchers.collections.shouldExist -import io.kotest.matchers.collections.shouldMatchEach +import io.kotest.matchers.collections.shouldHaveSingleElement import io.kotest.matchers.shouldBe import org.junit.jupiter.api.Test import org.neo4j.connectors.kafka.configuration.Neo4jConfiguration @@ -33,11 +32,13 @@ class Neo4jConnectorTest { config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_USERNAME } - .errorMessages() shouldContain "Must be non-empty." + .errorMessages() shouldContain + "Invalid value for configuration neo4j.authentication.basic.username: Must not be blank." config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_PASSWORD } - .errorMessages() shouldContain "Must be non-empty." + .errorMessages() shouldContain + "Invalid value for configuration neo4j.authentication.basic.password: Must not be blank." config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BASIC_REALM } @@ -53,7 +54,8 @@ class Neo4jConnectorTest { config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_KERBEROS_TICKET } - .errorMessages() shouldContain "Must be non-empty." + .errorMessages() shouldContain + "Invalid value for configuration neo4j.authentication.kerberos.ticket: Must not be blank." } @Test @@ -65,7 +67,8 @@ class Neo4jConnectorTest { config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_BEARER_TOKEN } - .errorMessages() shouldContain "Must be non-empty." + .errorMessages() shouldContain + "Invalid value for configuration neo4j.authentication.bearer.token: Must not be blank." } @Test @@ -77,15 +80,18 @@ class Neo4jConnectorTest { config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_SCHEME } - .errorMessages() shouldContain "Must be non-empty." + .errorMessages() shouldContain + "Invalid value for configuration neo4j.authentication.custom.scheme: Must not be blank." config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_PRINCIPAL } - .errorMessages() shouldContain "Must be non-empty." + .errorMessages() shouldContain + "Invalid value for configuration neo4j.authentication.custom.principal: Must not be blank." config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_CREDENTIALS } - .errorMessages() shouldContain "Must be non-empty." + .errorMessages() shouldContain + "Invalid value for configuration neo4j.authentication.custom.credentials: Must not be blank." config .configValues() .first { it.name() == Neo4jConfiguration.AUTHENTICATION_CUSTOM_REALM } @@ -106,7 +112,38 @@ class Neo4jConnectorTest { .configValues() .first { it.name() == SourceConfiguration.STRATEGY } .errorMessages() shouldContain - "Exactly one topic needs to be configured with pattern(s) describing the entities to query changes for. Please refer to documentation for more information." + "At least one topic needs to be configured with pattern(s) describing the entities to query changes for. Please refer to documentation for more information." + } + + @Test + fun `should validate query with query strategy`() { + val connector = Neo4jConnector() + + connector + .validate( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + SourceConfiguration.TOPIC to "topic", + SourceConfiguration.STRATEGY to "QUERY")) + .apply { + this.configValues() + .first { it.name() == SourceConfiguration.QUERY } + .errorMessages() shouldContain + "Invalid value for configuration neo4j.query: Must not be blank." + } + + connector + .validate( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + SourceConfiguration.QUERY to "MATCH (n) RETURN n", + SourceConfiguration.STRATEGY to "QUERY")) + .apply { + this.configValues() + .first { it.name() == SourceConfiguration.TOPIC } + .errorMessages() shouldContain + "Invalid value for configuration topic: Must not be blank." + } } @Test @@ -137,7 +174,7 @@ class Neo4jConnectorTest { .apply { this.configValues() .first { it.name() == SourceConfiguration.STRATEGY } - .errorMessages() shouldExist + .errorMessages() shouldHaveSingleElement { it.startsWith("Invalid value (;ABC] for configuration neo4j.cdc.topic.topic-1:") } @@ -150,15 +187,43 @@ class Neo4jConnectorTest { Neo4jConfiguration.AUTHENTICATION_TYPE to "NONE", SourceConfiguration.STRATEGY to "CDC", "neo4j.cdc.topic.topic-1" to "(:Person),()-[:KNOWS]-()", - "neo4j.cdc.topic.topic-2.patterns" to "(:Person),()-[:KNOWS]-(:Company)")) + "neo4j.cdc.topic.topic-2.patterns" to "(:Person),()-[:KNOWS]-(;Company)")) .apply { this.configValues() .first { it.name() == SourceConfiguration.STRATEGY } - .errorMessages() shouldMatchEach - listOf { - !it.startsWith( - "Exactly one topic needs to be configured with pattern(s) describing the entities to query changes for.") + .errorMessages() shouldHaveSingleElement + { + it.startsWith("Invalid value (:Person),()-[:KNOWS]-(;Company)") } } } + + @Test + fun `should return Neo4jQueryTask for query strategy`() { + val connector = Neo4jConnector() + + connector.start( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + Neo4jConfiguration.AUTHENTICATION_TYPE to "NONE", + SourceConfiguration.TOPIC to "my-topic", + SourceConfiguration.STRATEGY to "QUERY", + SourceConfiguration.QUERY to "MATCH (n) RETURN n.timestamp, n")) + + connector.taskClass() shouldBe Neo4jQueryTask::class.java + } + + @Test + fun `should return Neo4jCDCTask for cdc strategy`() { + val connector = Neo4jConnector() + + connector.start( + mutableMapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + Neo4jConfiguration.AUTHENTICATION_TYPE to "NONE", + SourceConfiguration.STRATEGY to "CDC", + "neo4j.cdc.topic.topic-1" to "(:Person)")) + + connector.taskClass() shouldBe Neo4jCDCTask::class.java + } } diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTaskTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTaskTest.kt similarity index 99% rename from source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTaskTest.kt rename to source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTaskTest.kt index 7c80d7962..668682ca4 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jSourceTaskTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTaskTest.kt @@ -40,7 +40,7 @@ import org.testcontainers.junit.jupiter.Container import org.testcontainers.junit.jupiter.Testcontainers @Testcontainers -class Neo4jSourceTaskTest { +class Neo4jQueryTaskTest { companion object { @Container @@ -77,7 +77,7 @@ class Neo4jSourceTaskTest { @BeforeEach fun before() { - task = Neo4jSourceTask() + task = Neo4jQueryTask() val sourceTaskContextMock = Mockito.mock(SourceTaskContext::class.java) val offsetStorageReader = Mockito.mock(OffsetStorageReader::class.java) Mockito.`when`(sourceTaskContextMock.offsetStorageReader()).thenReturn(offsetStorageReader) diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt index cdd7797cf..387417e8c 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/SourceConfigurationTest.kt @@ -16,14 +16,21 @@ */ package org.neo4j.connectors.kafka.source +import io.kotest.matchers.maps.shouldContainAll +import io.kotest.matchers.maps.shouldContainExactly +import io.kotest.matchers.shouldBe import io.kotest.matchers.throwable.shouldHaveMessage import kotlin.test.assertEquals import kotlin.test.assertFailsWith import kotlin.test.assertTrue import kotlin.time.Duration.Companion.minutes +import kotlin.time.Duration.Companion.seconds import org.apache.kafka.common.config.ConfigException import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertDoesNotThrow +import org.neo4j.cdc.client.selector.NodeSelector +import org.neo4j.cdc.client.selector.RelationshipNodeSelector +import org.neo4j.cdc.client.selector.RelationshipSelector import org.neo4j.connectors.kafka.configuration.Neo4jConfiguration class SourceConfigurationTest { @@ -46,18 +53,6 @@ class SourceConfigurationTest { "Invalid value none for configuration neo4j.source-strategy: Must be one of: 'QUERY', 'CDC'." } - assertFailsWith(ConfigException::class) { - SourceConfiguration( - mapOf( - Neo4jConfiguration.URI to "neo4j://localhost", - SourceConfiguration.TOPIC to "topic", - SourceConfiguration.STRATEGY to "QUERY")) - } - .also { - it shouldHaveMessage - "Missing required configuration \"neo4j.query\" which has no default value." - } - assertFailsWith(ConfigException::class) { SourceConfiguration( mapOf( @@ -69,18 +64,6 @@ class SourceConfigurationTest { "Invalid value none for configuration neo4j.source-strategy: Must be one of: 'QUERY', 'CDC'." } - assertFailsWith(ConfigException::class) { - SourceConfiguration( - mapOf( - Neo4jConfiguration.URI to "neo4j://localhost", - SourceConfiguration.STRATEGY to "QUERY", - SourceConfiguration.QUERY to "MATCH (n) RETURN n")) - } - .also { - it shouldHaveMessage - "Missing required configuration \"topic\" which has no default value." - } - assertFailsWith(ConfigException::class) { SourceConfiguration( mapOf( @@ -136,11 +119,11 @@ class SourceConfigurationTest { SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", - SourceConfiguration.QUERY_BATCH_SIZE to "-1")) + SourceConfiguration.BATCH_SIZE to "-1")) } .also { it shouldHaveMessage - "Invalid value -1 for configuration neo4j.query.batch-size: Value must be at least 1" + "Invalid value -1 for configuration neo4j.batch-size: Value must be at least 1" } assertFailsWith(ConfigException::class) { @@ -153,7 +136,7 @@ class SourceConfigurationTest { SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", - SourceConfiguration.QUERY_BATCH_SIZE to "50", + SourceConfiguration.BATCH_SIZE to "50", SourceConfiguration.ENFORCE_SCHEMA to "disabled")) } .also { @@ -171,7 +154,7 @@ class SourceConfigurationTest { SourceConfiguration.START_FROM to "USER_PROVIDED", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", - SourceConfiguration.QUERY_BATCH_SIZE to "50", + SourceConfiguration.BATCH_SIZE to "50", SourceConfiguration.ENFORCE_SCHEMA to "disabled")) } .also { @@ -192,7 +175,7 @@ class SourceConfigurationTest { SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", - SourceConfiguration.QUERY_BATCH_SIZE to "50", + SourceConfiguration.BATCH_SIZE to "50", SourceConfiguration.ENFORCE_SCHEMA to "true")) assertEquals(SourceType.QUERY, config.strategy) @@ -202,7 +185,7 @@ class SourceConfigurationTest { assertEquals(StartFrom.EARLIEST, config.startFrom) assertEquals(1.minutes, config.queryPollingInterval) assertEquals(5.minutes, config.queryTimeout) - assertEquals(50, config.queryBatchSize) + assertEquals(50, config.batchSize) assertTrue(config.enforceSchema) } @@ -219,7 +202,7 @@ class SourceConfigurationTest { SourceConfiguration.START_FROM to "EARLIEST", SourceConfiguration.QUERY_POLL_INTERVAL to "1m", SourceConfiguration.QUERY_TIMEOUT to "5m", - SourceConfiguration.QUERY_BATCH_SIZE to "50", + SourceConfiguration.BATCH_SIZE to "50", SourceConfiguration.ENFORCE_SCHEMA to "true")) assertEquals(SourceType.QUERY, config.strategy) @@ -229,7 +212,75 @@ class SourceConfigurationTest { assertEquals(StartFrom.EARLIEST, config.startFrom) assertEquals(1.minutes, config.queryPollingInterval) assertEquals(5.minutes, config.queryTimeout) - assertEquals(50, config.queryBatchSize) + assertEquals(50, config.batchSize) assertTrue(config.enforceSchema) } + + @Test + fun `valid config with cdc to single topic`() { + val config = + SourceConfiguration( + mapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + SourceConfiguration.STRATEGY to "CDC", + SourceConfiguration.START_FROM to "EARLIEST", + SourceConfiguration.BATCH_SIZE to "10000", + SourceConfiguration.ENFORCE_SCHEMA to "true", + SourceConfiguration.CDC_POLL_INTERVAL to "5s", + "neo4j.cdc.topic.topic-1" to "(),()-[]-()")) + + config.strategy shouldBe SourceType.CDC + config.startFrom shouldBe StartFrom.EARLIEST + config.batchSize shouldBe 10000 + config.cdcPollingInterval shouldBe 5.seconds + config.enforceSchema shouldBe true + config.cdcSelectorsToTopics shouldContainExactly + mapOf( + NodeSelector(null, emptySet(), emptySet(), emptyMap()) to listOf("topic-1"), + RelationshipSelector( + null, + emptySet(), + null, + RelationshipNodeSelector(emptySet(), emptyMap()), + RelationshipNodeSelector(emptySet(), emptyMap()), + emptyMap()) to listOf("topic-1")) + } + + @Test + fun `valid config with cdc to multiple topics`() { + val config = + SourceConfiguration( + mapOf( + Neo4jConfiguration.URI to "neo4j://localhost", + SourceConfiguration.STRATEGY to "CDC", + SourceConfiguration.START_FROM to "EARLIEST", + SourceConfiguration.BATCH_SIZE to "10000", + SourceConfiguration.ENFORCE_SCHEMA to "true", + SourceConfiguration.CDC_POLL_INTERVAL to "5s", + "neo4j.cdc.topic.people" to "(:Person)", + "neo4j.cdc.topic.company" to "(:Company)", + "neo4j.cdc.topic.works_for" to "(:Person)-[:WORKS_FOR]->(:Company)", + "neo4j.cdc.topic.topic-1" to "(:Person)", + "neo4j.cdc.topic.topic-2" to "(:Person {id})")) + + config.strategy shouldBe SourceType.CDC + config.startFrom shouldBe StartFrom.EARLIEST + config.batchSize shouldBe 10000 + config.cdcPollingInterval shouldBe 5.seconds + config.enforceSchema shouldBe true + config.cdcSelectorsToTopics shouldContainAll + mapOf( + NodeSelector(null, emptySet(), setOf("Person"), emptyMap()) to + listOf("people", "topic-1"), + NodeSelector(null, emptySet(), setOf("Person"), emptyMap(), setOf("id"), emptySet()) to + listOf("topic-2"), + NodeSelector(null, emptySet(), setOf("Company"), emptyMap()) to listOf("company"), + RelationshipSelector( + null, + emptySet(), + "WORKS_FOR", + RelationshipNodeSelector(setOf("Person"), emptyMap()), + RelationshipNodeSelector(setOf("Company"), emptyMap()), + emptyMap()) to listOf("works_for")) + } } From 5077b3328081ca115e1a9b31f74719bb2e86f696 Mon Sep 17 00:00:00 2001 From: Ali Ince Date: Tue, 17 Oct 2023 15:56:20 +0100 Subject: [PATCH 3/4] build: use released version of cdc client --- source/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/pom.xml b/source/pom.xml index 7a3b8101d..87b945977 100644 --- a/source/pom.xml +++ b/source/pom.xml @@ -12,7 +12,7 @@ source Neo4j Connector for Kafka - Source - 1.0.4-SNAPSHOT + 1.0.3 From 871a2fdd3a74bc80bdaa922db7e9ed5f79abcdb5 Mon Sep 17 00:00:00 2001 From: Ali Ince Date: Wed, 18 Oct 2023 11:28:35 +0100 Subject: [PATCH 4/4] chore: address review comments --- .../kafka/configuration/Neo4jConfiguration.kt | 2 +- .../kafka/configuration/helpers/Validators.kt | 60 +-- .../configuration/helpers/ValidatorsTest.kt | 44 +- .../connectors/kafka/sink/Neo4jConnector.kt | 2 +- .../kafka/sink/SinkConfiguration.kt | 7 +- .../{Neo4jCDCTask.kt => Neo4jCdcTask.kt} | 10 +- .../connectors/kafka/source/Neo4jConnector.kt | 2 +- .../kafka/source/Neo4jQueryService.kt | 60 ++- .../connectors/kafka/source/Neo4jQueryTask.kt | 8 +- .../kafka/source/SourceConfiguration.kt | 11 +- ...eo4jCDCTaskTest.kt => Neo4jCdcTaskTest.kt} | 4 +- .../kafka/source/Neo4jConnectorTest.kt | 2 +- .../kafka/source/Neo4jQueryTaskTest.kt | 397 +++++++++++++++++- .../source/Neo4jSourceConnectorTest.kt | 42 -- 14 files changed, 477 insertions(+), 174 deletions(-) rename source/src/main/kotlin/org/neo4j/connectors/kafka/source/{Neo4jCDCTask.kt => Neo4jCdcTask.kt} (94%) rename source/src/test/kotlin/org/neo4j/connectors/kafka/source/{Neo4jCDCTaskTest.kt => Neo4jCdcTaskTest.kt} (99%) delete mode 100644 source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt diff --git a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt index 525f19555..465a597a4 100644 --- a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt +++ b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/Neo4jConfiguration.kt @@ -278,7 +278,7 @@ open class Neo4jConfiguration(configDef: ConfigDef, originals: Map<*, *>, val ty } /** Perform validation on dependent configuration items */ - fun validate(config: org.apache.kafka.common.config.Config, originals: Map) { + fun validate(config: org.apache.kafka.common.config.Config) { // authentication configuration config.validateNonEmptyIfVisible(AUTHENTICATION_BASIC_USERNAME) config.validateNonEmptyIfVisible(AUTHENTICATION_BASIC_PASSWORD) diff --git a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt index e03f4ffb2..38203ad92 100644 --- a/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt +++ b/common/src/main/kotlin/org/neo4j/connectors/kafka/configuration/helpers/Validators.kt @@ -53,7 +53,7 @@ object Validators { fun blank(): ConfigDef.Validator { return ConfigDef.Validator { name, value -> if (value is String) { - if (value.isNotEmpty()) { + if (value.isNotBlank()) { throw ConfigException(name, value, "Must be blank.") } } else if (value is List<*>) { @@ -66,10 +66,10 @@ object Validators { } } - fun notBlank(): ConfigDef.Validator { + fun notBlankOrEmpty(): ConfigDef.Validator { return ConfigDef.Validator { name, value -> if (value is String) { - if (value.isEmpty()) { + if (value.isBlank()) { throw ConfigException(name, value, "Must not be blank.") } } else if (value is List<*>) { @@ -129,12 +129,10 @@ object Validators { fun uri(vararg schemes: String): ConfigDef.Validator { return object : ConfigDef.Validator { override fun ensureValid(name: String?, value: Any?) { + notBlankOrEmpty().ensureValid(name, value) + when (value) { is String -> { - if (value.isBlank()) { - throw ConfigException(name, value, "Must not be blank.") - } - try { val parsed = URI(value) @@ -147,15 +145,8 @@ object Validators { } } is List<*> -> { - if (value.isEmpty()) { - throw ConfigException(name, value, "Must not be blank.") - } - value.forEach { ensureValid(name, it) } } - else -> { - throw ConfigException(name, value, "Must be a String or a List.") - } } } } @@ -164,32 +155,27 @@ object Validators { fun file(readable: Boolean = true, writable: Boolean = false): ConfigDef.Validator { return object : ConfigDef.Validator { override fun ensureValid(name: String?, value: Any?) { - if (value is String) { - if (value.isBlank()) { - throw ConfigException(name, value, "Must not be blank.") - } + notBlankOrEmpty().ensureValid(name, value) - val file = File(value) - if (!file.isAbsolute) { - throw ConfigException(name, value, "Must be an absolute path.") - } - if (!file.isFile) { - throw ConfigException(name, value, "Must be a file.") - } - if (readable && !file.canRead()) { - throw ConfigException(name, value, "Must be readable.") - } - if (writable && !file.canWrite()) { - throw ConfigException(name, value, "Must be writable.") + when (value) { + is String -> { + val file = File(value) + if (!file.isAbsolute) { + throw ConfigException(name, value, "Must be an absolute path.") + } + if (!file.isFile) { + throw ConfigException(name, value, "Must be a file.") + } + if (readable && !file.canRead()) { + throw ConfigException(name, value, "Must be readable.") + } + if (writable && !file.canWrite()) { + throw ConfigException(name, value, "Must be writable.") + } } - } else if (value is List<*>) { - if (value.isEmpty()) { - throw ConfigException(name, value, "Must not be blank.") + is List<*> -> { + value.forEach { ensureValid(name, it) } } - - value.forEach { ensureValid(name, it) } - } else { - throw ConfigException(name, value, "Must be a String or a List.") } } } diff --git a/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt b/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt index 71ad41ef0..49fd69eba 100644 --- a/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt +++ b/common/src/test/kotlin/org/neo4j/connectors/kafka/configuration/helpers/ValidatorsTest.kt @@ -234,15 +234,21 @@ class ValidatorsTest { } } - listOf("", listOf()).forEach { v -> - assertFailsWith(ConfigException::class) { - Validators.uri().apply { this.ensureValid("my.property", v) } - } - .also { - assertEquals( - "Invalid value $v for configuration my.property: Must not be blank.", it.message) - } - } + assertFailsWith(ConfigException::class) { + Validators.uri().apply { this.ensureValid("my.property", "") } + } + .also { + assertEquals( + "Invalid value for configuration my.property: Must not be blank.", it.message) + } + + assertFailsWith(ConfigException::class) { + Validators.uri().apply { this.ensureValid("my.property", listOf()) } + } + .also { + assertEquals( + "Invalid value [] for configuration my.property: Must not be empty.", it.message) + } assertFailsWith(ConfigException::class) { Validators.uri().apply { this.ensureValid("my.property", "fxz:\\sab.set") } @@ -284,13 +290,19 @@ class ValidatorsTest { } } - listOf("", listOf()).forEach { v -> - assertFailsWith(ConfigException::class) { Validators.file().ensureValid("my.property", v) } - .also { - assertEquals( - "Invalid value $v for configuration my.property: Must not be blank.", it.message) - } - } + assertFailsWith(ConfigException::class) { Validators.file().ensureValid("my.property", "") } + .also { + assertEquals( + "Invalid value for configuration my.property: Must not be blank.", it.message) + } + + assertFailsWith(ConfigException::class) { + Validators.file().ensureValid("my.property", listOf()) + } + .also { + assertEquals( + "Invalid value [] for configuration my.property: Must not be empty.", it.message) + } assertFailsWith(ConfigException::class) { Validators.file().ensureValid("my.property", "deneme.txt") diff --git a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt index b079764e6..7d0140023 100644 --- a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt +++ b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/Neo4jConnector.kt @@ -43,7 +43,7 @@ class Neo4jConnector : SinkConnector() { val originals = connectorConfigs ?: emptyMap() val result = super.validate(originals) - SinkConfiguration.validate(result, originals) + SinkConfiguration.validate(result) return result } diff --git a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt index 51e12e95e..dd72acd14 100644 --- a/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt +++ b/sink/src/main/kotlin/org/neo4j/connectors/kafka/sink/SinkConfiguration.kt @@ -124,11 +124,8 @@ class SinkConfiguration(originals: Map<*, *>) : return migrated } - internal fun validate( - config: org.apache.kafka.common.config.Config, - originals: Map - ) { - Neo4jConfiguration.validate(config, originals) + internal fun validate(config: org.apache.kafka.common.config.Config) { + Neo4jConfiguration.validate(config) } fun config(): ConfigDef = diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTask.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCdcTask.kt similarity index 94% rename from source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTask.kt rename to source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCdcTask.kt index 992680163..876ffea00 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTask.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jCdcTask.kt @@ -41,8 +41,8 @@ import org.neo4j.driver.SessionConfig import org.slf4j.Logger import org.slf4j.LoggerFactory -class Neo4jCDCTask : SourceTask() { - private val log: Logger = LoggerFactory.getLogger(Neo4jCDCTask::class.java) +class Neo4jCdcTask : SourceTask() { + private val log: Logger = LoggerFactory.getLogger(Neo4jCdcTask::class.java) private lateinit var settings: Map private lateinit var config: SourceConfiguration @@ -140,7 +140,7 @@ class Neo4jCDCTask : SourceTask() { private fun resumeFrom(config: SourceConfiguration, cdc: CDCService): String { val offset = context.offsetStorageReader().offset(config.partition) ?: emptyMap() - if (!config.ignoreStoredOffset && offset["value"] != null && offset["value"] is String) { + if (!config.ignoreStoredOffset && offset["value"] is String) { log.debug("previously stored offset is {}", offset["value"]) return offset["value"] as String } @@ -152,9 +152,11 @@ class Neo4jCDCTask : SourceTask() { StartFrom.USER_PROVIDED -> config.startFromCustom } log.debug( - "{} is set as {}, offset to resume from is {}", + "{} is set as {} ({} = {}), offset to resume from is {}", SourceConfiguration.START_FROM, config.startFrom, + SourceConfiguration.IGNORE_STORED_OFFSET, + config.ignoreStoredOffset, value) return value } diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt index 630a3bc09..4490ca77e 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnector.kt @@ -38,7 +38,7 @@ class Neo4jConnector : SourceConnector() { override fun taskClass(): Class = when (config.strategy) { - SourceType.CDC -> Neo4jCDCTask::class.java + SourceType.CDC -> Neo4jCdcTask::class.java SourceType.QUERY -> Neo4jQueryTask::class.java } diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryService.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryService.kt index 556e5bd97..3852731cd 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryService.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryService.kt @@ -42,7 +42,7 @@ import org.slf4j.LoggerFactory class Neo4jQueryService( private val config: SourceConfiguration, - offsetStorageReader: OffsetStorageReader + private val offsetStorageReader: OffsetStorageReader ) : AutoCloseable { private val log: Logger = LoggerFactory.getLogger(Neo4jQueryService::class.java) @@ -54,36 +54,7 @@ class Neo4jQueryService( private val isClose = AtomicBoolean() - private val currentOffset: AtomicLong by lazy { - val offset = offsetStorageReader.offset(sourcePartition) ?: emptyMap() - - val startValue = - if (offset["value"] != null && offset["property"] == config.queryStreamingProperty) { - log.info( - "Resuming from offset $offset, '${config.startFrom}' specified for configuration '${SourceConfiguration.START_FROM}' is ignored.") - offset["value"] as Long - } else { - when (config.startFrom) { - StartFrom.EARLIEST -> { - log.info( - "No offset has been found and '${config.startFrom}' for configuration '${SourceConfiguration.START_FROM}' will be used.") - (-1) - } - StartFrom.NOW -> { - log.info( - "No offset has been found and '${config.startFrom}' for configuration '${SourceConfiguration.START_FROM}' will be used.") - System.currentTimeMillis() - } - StartFrom.USER_PROVIDED -> { - val provided = config.startFromCustom.toLong() - log.info( - "No offset has been found and '${config.startFrom}' for configuration '${SourceConfiguration.START_FROM}' will be used with a starting offset value '${provided}'.") - provided - } - } - } - AtomicLong(startValue) - } + private val currentOffset: AtomicLong = AtomicLong(resumeFrom()) private val pollInterval = config.queryPollingInterval.inWholeMilliseconds private val isStreamingPropertyDefined = config.queryStreamingProperty.isNotBlank() @@ -208,4 +179,31 @@ class Neo4jQueryService( config.close() log.info("Neo4j Source Service closed successfully") } + + private fun resumeFrom(): Long { + val offset = offsetStorageReader.offset(config.partition) ?: emptyMap() + + if (!config.ignoreStoredOffset && + offset["value"] is Long && + offset["property"] == config.queryStreamingProperty) { + log.debug("previously stored offset is {}", offset["value"]) + return offset["value"] as Long + } + + val value = + when (config.startFrom) { + StartFrom.EARLIEST -> (-1) + StartFrom.NOW -> System.currentTimeMillis() + StartFrom.USER_PROVIDED -> config.startFromCustom.toLong() + } + + log.debug( + "{} is set as {} ({} = {}), offset to resume from is {}", + SourceConfiguration.START_FROM, + config.startFrom, + SourceConfiguration.IGNORE_STORED_OFFSET, + config.ignoreStoredOffset, + value) + return value + } } diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTask.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTask.kt index 612864492..77e800691 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTask.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTask.kt @@ -27,7 +27,7 @@ import org.slf4j.LoggerFactory class Neo4jQueryTask : SourceTask() { private lateinit var props: Map private lateinit var config: SourceConfiguration - private lateinit var neo4JQueryService: Neo4jQueryService + private lateinit var neo4jQueryService: Neo4jQueryService private val log: Logger = LoggerFactory.getLogger(Neo4jQueryTask::class.java) @@ -36,15 +36,15 @@ class Neo4jQueryTask : SourceTask() { override fun start(props: MutableMap?) { this.props = props!! config = SourceConfiguration(this.props) - neo4JQueryService = Neo4jQueryService(config, context.offsetStorageReader()) + neo4jQueryService = Neo4jQueryService(config, context.offsetStorageReader()) } @DelicateCoroutinesApi override fun stop() { log.info("Stop() - Closing Neo4j Source Service.") StreamsUtils.ignoreExceptions( - { neo4JQueryService.close() }, UninitializedPropertyAccessException::class.java) + { neo4jQueryService.close() }, UninitializedPropertyAccessException::class.java) } - override fun poll(): List? = neo4JQueryService.poll() + override fun poll(): List? = neo4jQueryService.poll() } diff --git a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt index 281ff71ff..21ee8af6c 100644 --- a/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt +++ b/source/src/main/kotlin/org/neo4j/connectors/kafka/source/SourceConfiguration.kt @@ -40,8 +40,6 @@ import org.neo4j.connectors.kafka.configuration.helpers.Validators import org.neo4j.connectors.kafka.configuration.helpers.Validators.validateNonEmptyIfVisible import org.neo4j.connectors.kafka.configuration.helpers.parseSimpleString import org.neo4j.connectors.kafka.configuration.helpers.toSimpleString -import org.neo4j.connectors.kafka.source.DeprecatedNeo4jSourceConfiguration.Companion.ENFORCE_SCHEMA -import org.neo4j.connectors.kafka.source.DeprecatedNeo4jSourceConfiguration.Companion.TOPIC import org.neo4j.connectors.kafka.utils.PropertiesUtil import org.neo4j.driver.TransactionConfig @@ -248,11 +246,8 @@ class SourceConfiguration(originals: Map<*, *>) : return migrated } - internal fun validate( - config: org.apache.kafka.common.config.Config, - originals: Map - ) { - Neo4jConfiguration.validate(config, originals) + internal fun validate(config: Config, originals: Map) { + Neo4jConfiguration.validate(config) // START_FROM user defined validation config.validateNonEmptyIfVisible(START_FROM_VALUE) @@ -280,7 +275,7 @@ class SourceConfiguration(originals: Map<*, *>) : cdcTopics.forEach { // parse & validate CDC patterns try { - Validators.notBlank().ensureValid(it.key, it.value) + Validators.notBlankOrEmpty().ensureValid(it.key, it.value) try { Pattern.parse(it.value as String?) diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTaskTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCdcTaskTest.kt similarity index 99% rename from source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTaskTest.kt rename to source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCdcTaskTest.kt index baa04f561..5a9a689b3 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCDCTaskTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jCdcTaskTest.kt @@ -45,7 +45,7 @@ import org.testcontainers.junit.jupiter.Container import org.testcontainers.junit.jupiter.Testcontainers @Testcontainers -class Neo4jCDCTaskTest { +class Neo4jCdcTaskTest { companion object { @Container val neo4j: Neo4jContainer<*> = @@ -95,7 +95,7 @@ class Neo4jCDCTaskTest { mapOf("db" to "neo4j", "mode" to "FULL")) .consume() - task = Neo4jCDCTask() + task = Neo4jCdcTask() task.initialize(newTaskContextWithOffset()) } diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt index 730e5391b..0dd3393f4 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jConnectorTest.kt @@ -224,6 +224,6 @@ class Neo4jConnectorTest { SourceConfiguration.STRATEGY to "CDC", "neo4j.cdc.topic.topic-1" to "(:Person)")) - connector.taskClass() shouldBe Neo4jCDCTask::class.java + connector.taskClass() shouldBe Neo4jCdcTask::class.java } } diff --git a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTaskTest.kt b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTaskTest.kt index 668682ca4..416c62715 100644 --- a/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTaskTest.kt +++ b/source/src/test/kotlin/org/neo4j/connectors/kafka/source/Neo4jQueryTaskTest.kt @@ -16,9 +16,16 @@ */ package org.neo4j.connectors.kafka.source +import io.kotest.matchers.collections.shouldHaveSize +import java.time.Clock +import java.time.Duration +import java.time.Instant +import java.time.ZoneId import java.util.* import java.util.concurrent.TimeUnit import kotlin.test.assertFailsWith +import kotlin.time.Duration.Companion.seconds +import kotlin.time.toJavaDuration import org.apache.kafka.connect.data.Struct import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.source.SourceRecord @@ -27,7 +34,11 @@ import org.apache.kafka.connect.source.SourceTaskContext import org.apache.kafka.connect.storage.OffsetStorageReader import org.awaitility.Awaitility.await import org.junit.jupiter.api.* -import org.mockito.Mockito +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.EnumSource +import org.mockito.ArgumentMatchers +import org.mockito.kotlin.doReturn +import org.mockito.kotlin.mock import org.neo4j.connectors.kafka.configuration.AuthenticationType import org.neo4j.connectors.kafka.configuration.Neo4jConfiguration import org.neo4j.connectors.kafka.utils.JSONUtils @@ -78,11 +89,7 @@ class Neo4jQueryTaskTest { @BeforeEach fun before() { task = Neo4jQueryTask() - val sourceTaskContextMock = Mockito.mock(SourceTaskContext::class.java) - val offsetStorageReader = Mockito.mock(OffsetStorageReader::class.java) - Mockito.`when`(sourceTaskContextMock.offsetStorageReader()).thenReturn(offsetStorageReader) - Mockito.`when`(offsetStorageReader.offset(Mockito.anyMap())).thenReturn(emptyMap()) - task.initialize(sourceTaskContextMock) + task.initialize(newTaskContextWithOffset(emptyMap())) } private fun structToMap(struct: Struct): Map = @@ -100,6 +107,241 @@ class Neo4jQueryTaskTest { fun Struct.toMap() = structToMap(this) + @Test + fun `should use correct offset when startFrom=earliest`() { + // create data with timestamp set as 0 + insertRecords(50, Clock.fixed(Instant.EPOCH, ZoneId.systemDefault())) + // create data with timestamp set as NOW + 5m + insertRecords( + 100, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + + // start task with EARLIEST, previous changes should be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.QUERY.toString(), + SourceConfiguration.START_FROM to StartFrom.EARLIEST.toString(), + SourceConfiguration.TOPIC to UUID.randomUUID().toString(), + SourceConfiguration.QUERY to getSourceQuery())) + + // poll for changes + val changes = mutableListOf() + await().atMost(30.seconds.toJavaDuration()).untilAsserted { + task.poll()?.let { changes.addAll(it) } + + // expect to see previously created data + changes shouldHaveSize 150 + } + } + + @Test + fun `should use correct offset when startFrom=now`() { + // create data with timestamp set as NOW - 5m + insertRecords( + 100, Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + // create data with timestamp set as NOW + 5m + insertRecords( + 75, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + + // start task with NOW, previous changes should NOT be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.QUERY.toString(), + SourceConfiguration.START_FROM to StartFrom.NOW.toString(), + SourceConfiguration.TOPIC to UUID.randomUUID().toString(), + SourceConfiguration.QUERY to getSourceQuery())) + + // poll for changes + val changes = mutableListOf() + await().atMost(30.seconds.toJavaDuration()).untilAsserted { + task.poll()?.let { changes.addAll(it) } + + // expect to see only the data created after task is started + changes shouldHaveSize 75 + } + } + + @Test + fun `should use correct offset when startFrom=user provided`() { + // create data with timestamp set as NOW - 5m + insertRecords( + 10, Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + // create data with timestamp set as NOW + 5m + insertRecords( + 25, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + // create data with timestamp set as NOW + 10m + insertRecords( + 75, Clock.fixed(Instant.now().plus(Duration.ofMinutes(10)), ZoneId.systemDefault())) + + // start task with NOW, previous changes should NOT be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.QUERY.toString(), + SourceConfiguration.START_FROM to StartFrom.USER_PROVIDED.toString(), + SourceConfiguration.START_FROM_VALUE to + Instant.now().plus(Duration.ofMinutes(7)).toEpochMilli().toString(), + SourceConfiguration.TOPIC to UUID.randomUUID().toString(), + SourceConfiguration.QUERY to getSourceQuery())) + + // poll for changes + val changes = mutableListOf() + await().atMost(30.seconds.toJavaDuration()).untilAsserted { + task.poll()?.let { changes.addAll(it) } + + // expect to see only the data created after task is started + changes shouldHaveSize 75 + } + } + + @ParameterizedTest + @EnumSource(StartFrom::class) + fun `should use stored offset regardless of provided startFrom`(startFrom: StartFrom) { + // create data with timestamp set as NOW - 5m + insertRecords( + 25, Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + // create data with timestamp set as NOW - 2m + insertRecords( + 25, Clock.fixed(Instant.now().minus(Duration.ofMinutes(2)), ZoneId.systemDefault())) + // create data with timestamp set as NOW + 2m + insertRecords( + 25, Clock.fixed(Instant.now().plus(Duration.ofMinutes(2)), ZoneId.systemDefault())) + + // set an offset of NOW - 3m + task.initialize( + newTaskContextWithOffset( + "timestamp", Instant.now().minus(Duration.ofMinutes(3)).toEpochMilli())) + + // start task with provided START_FROM, with the mocked task context + task.start( + buildMap { + put(Neo4jConfiguration.URI, neo4j.boltUrl) + put(Neo4jConfiguration.AUTHENTICATION_TYPE, AuthenticationType.NONE.toString()) + put(SourceConfiguration.STRATEGY, SourceType.QUERY.toString()) + put(SourceConfiguration.TOPIC, UUID.randomUUID().toString()) + put(SourceConfiguration.QUERY, getSourceQuery()) + put(SourceConfiguration.QUERY_STREAMING_PROPERTY, "timestamp") + + put(SourceConfiguration.START_FROM, startFrom.toString()) + if (startFrom == StartFrom.USER_PROVIDED) { + put(SourceConfiguration.START_FROM_VALUE, "-1") + } + }) + + // poll for changes + val changes = mutableListOf() + await().atMost(30.seconds.toJavaDuration()).untilAsserted { + task.poll()?.let { changes.addAll(it) } + + // expect to see only the data created after task is started + changes shouldHaveSize 50 + } + } + + @Test + fun `should ignore stored offset when startFrom=earliest`() { + // create data with timestamp set as 0 + insertRecords(50, Clock.fixed(Instant.EPOCH, ZoneId.systemDefault())) + // create data with timestamp set as NOW + 5m + insertRecords( + 100, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + + task.initialize(newTaskContextWithOffset("", Instant.now().toEpochMilli())) + + // start task with EARLIEST, previous changes should be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.QUERY.toString(), + SourceConfiguration.START_FROM to StartFrom.EARLIEST.toString(), + SourceConfiguration.TOPIC to UUID.randomUUID().toString(), + SourceConfiguration.QUERY to getSourceQuery(), + SourceConfiguration.IGNORE_STORED_OFFSET to "true")) + + // poll for changes + val changes = mutableListOf() + await().atMost(30.seconds.toJavaDuration()).untilAsserted { + task.poll()?.let { changes.addAll(it) } + + // expect to see previously created data + changes shouldHaveSize 150 + } + } + + @Test + fun `should ignore stored offset when startFrom=now`() { + // create data with timestamp set as NOW - 5m + insertRecords( + 100, Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + // create data with timestamp set as NOW + 5m + insertRecords( + 75, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + + task.initialize(newTaskContextWithOffset("", Instant.EPOCH.toEpochMilli())) + + // start task with NOW, previous changes should NOT be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.QUERY.toString(), + SourceConfiguration.START_FROM to StartFrom.NOW.toString(), + SourceConfiguration.TOPIC to UUID.randomUUID().toString(), + SourceConfiguration.QUERY to getSourceQuery(), + SourceConfiguration.IGNORE_STORED_OFFSET to "true")) + + // poll for changes + val changes = mutableListOf() + await().atMost(30.seconds.toJavaDuration()).untilAsserted { + task.poll()?.let { changes.addAll(it) } + + // expect to see only the data created after task is started + changes shouldHaveSize 75 + } + } + + @Test + fun `should ignore stored offset when startFrom=user provided`() { + // create data with timestamp set as NOW - 5m + insertRecords( + 10, Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + // create data with timestamp set as NOW + 5m + insertRecords( + 25, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + // create data with timestamp set as NOW + 10m + insertRecords( + 75, Clock.fixed(Instant.now().plus(Duration.ofMinutes(10)), ZoneId.systemDefault())) + + task.initialize(newTaskContextWithOffset("", Instant.EPOCH.toEpochMilli())) + + // start task with NOW, previous changes should NOT be visible + task.start( + mapOf( + Neo4jConfiguration.URI to neo4j.boltUrl, + Neo4jConfiguration.AUTHENTICATION_TYPE to AuthenticationType.NONE.toString(), + SourceConfiguration.STRATEGY to SourceType.QUERY.toString(), + SourceConfiguration.START_FROM to StartFrom.USER_PROVIDED.toString(), + SourceConfiguration.START_FROM_VALUE to + Instant.now().plus(Duration.ofMinutes(7)).toEpochMilli().toString(), + SourceConfiguration.TOPIC to UUID.randomUUID().toString(), + SourceConfiguration.QUERY to getSourceQuery(), + SourceConfiguration.IGNORE_STORED_OFFSET to "true")) + + // poll for changes + val changes = mutableListOf() + await().atMost(30.seconds.toJavaDuration()).untilAsserted { + task.poll()?.let { changes.addAll(it) } + + // expect to see only the data created after task is started + changes shouldHaveSize 75 + } + } + @Test fun `should source data from Neo4j with custom QUERY from NOW`() { val props = mutableMapOf() @@ -110,12 +352,16 @@ class Neo4jQueryTaskTest { props[SourceConfiguration.QUERY] = getSourceQuery() props[Neo4jConfiguration.AUTHENTICATION_TYPE] = AuthenticationType.NONE.toString() + val expected = + insertRecords( + 10, + Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault()), + true) + task.start(props) - val totalRecords = 10 - val expected = insertRecords(totalRecords, true) val list = mutableListOf() - await().atMost(60, TimeUnit.SECONDS).until { + await().atMost(30, TimeUnit.SECONDS).until { task.poll()?.let { list.addAll(it) } val actualList = list.map { JSONUtils.readValue>(it.value()) } expected.containsAll(actualList) @@ -133,12 +379,14 @@ class Neo4jQueryTaskTest { props[SourceConfiguration.QUERY] = getSourceQuery() props[Neo4jConfiguration.AUTHENTICATION_TYPE] = AuthenticationType.NONE.toString() + val expected = + insertRecords( + 10, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault())) + task.start(props) - val totalRecords = 10 - val expected = insertRecords(totalRecords) val list = mutableListOf() - await().atMost(60, TimeUnit.SECONDS).until { + await().atMost(30, TimeUnit.SECONDS).until { task.poll()?.let { list.addAll(it) } val actualList = list.map { (it.value() as Struct).toMap() } expected.containsAll(actualList) @@ -156,13 +404,22 @@ class Neo4jQueryTaskTest { props[SourceConfiguration.QUERY] = getSourceQuery() props[Neo4jConfiguration.AUTHENTICATION_TYPE] = AuthenticationType.NONE.toString() - val totalRecords = 10 - val expected = insertRecords(totalRecords, true) + val expected = mutableListOf>() + expected.addAll( + insertRecords( + 10, + Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault()), + true)) + expected.addAll( + insertRecords( + 10, + Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault()), + true)) task.start(props) val list = mutableListOf() - await().atMost(60, TimeUnit.SECONDS).until { + await().atMost(30, TimeUnit.SECONDS).until { task.poll()?.let { list.addAll(it) } val actualList = list.map { JSONUtils.readValue>(it.value()) } expected == actualList @@ -181,20 +438,102 @@ class Neo4jQueryTaskTest { props[SourceConfiguration.QUERY] = getSourceQuery() props[Neo4jConfiguration.AUTHENTICATION_TYPE] = AuthenticationType.NONE.toString() - val totalRecords = 10 - val expected = insertRecords(totalRecords) + val expected = mutableListOf>() + expected.addAll( + insertRecords( + 10, Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault()))) + expected.addAll( + insertRecords( + 10, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault()))) task.start(props) val list = mutableListOf() - await().atMost(60, TimeUnit.SECONDS).until { + await().atMost(30, TimeUnit.SECONDS).until { task.poll()?.let { list.addAll(it) } val actualList = list.map { (it.value() as Struct).toMap() } expected == actualList } } - private fun insertRecords(totalRecords: Int, longToInt: Boolean = false) = + @Test + fun `should source data from Neo4j with custom QUERY from USER_PROVIDED`() { + val props = mutableMapOf() + props[Neo4jConfiguration.URI] = neo4j.boltUrl + props[SourceConfiguration.TOPIC] = UUID.randomUUID().toString() + props[SourceConfiguration.QUERY_POLL_INTERVAL] = "10ms" + props[SourceConfiguration.QUERY_STREAMING_PROPERTY] = "timestamp" + props[SourceConfiguration.QUERY] = getSourceQuery() + props[SourceConfiguration.START_FROM] = StartFrom.USER_PROVIDED.toString() + props[SourceConfiguration.START_FROM_VALUE] = + Instant.now().minus(Duration.ofMinutes(7)).toEpochMilli().toString() + props[Neo4jConfiguration.AUTHENTICATION_TYPE] = AuthenticationType.NONE.toString() + + insertRecords( + 10, Clock.fixed(Instant.now().minus(Duration.ofMinutes(10)), ZoneId.systemDefault())) + + val expected = mutableListOf>() + expected.addAll( + insertRecords( + 10, + Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault()), + true)) + expected.addAll( + insertRecords( + 10, + Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault()), + true)) + + task.start(props) + + val list = mutableListOf() + await().atMost(30, TimeUnit.SECONDS).until { + task.poll()?.let { list.addAll(it) } + val actualList = list.map { JSONUtils.readValue>(it.value()) } + expected.containsAll(actualList) + } + } + + @Test + fun `should source data from Neo4j with custom QUERY from USER_PROVIDED with Schema`() { + val props = mutableMapOf() + props[Neo4jConfiguration.URI] = neo4j.boltUrl + props[SourceConfiguration.TOPIC] = UUID.randomUUID().toString() + props[SourceConfiguration.QUERY_POLL_INTERVAL] = "10ms" + props[SourceConfiguration.ENFORCE_SCHEMA] = "true" + props[SourceConfiguration.QUERY_STREAMING_PROPERTY] = "timestamp" + props[SourceConfiguration.QUERY] = getSourceQuery() + props[SourceConfiguration.START_FROM] = StartFrom.USER_PROVIDED.toString() + props[SourceConfiguration.START_FROM_VALUE] = + Instant.now().minus(Duration.ofMinutes(7)).toEpochMilli().toString() + props[Neo4jConfiguration.AUTHENTICATION_TYPE] = AuthenticationType.NONE.toString() + + insertRecords( + 10, Clock.fixed(Instant.now().minus(Duration.ofMinutes(10)), ZoneId.systemDefault())) + + val expected = mutableListOf>() + expected.addAll( + insertRecords( + 10, Clock.fixed(Instant.now().minus(Duration.ofMinutes(5)), ZoneId.systemDefault()))) + expected.addAll( + insertRecords( + 10, Clock.fixed(Instant.now().plus(Duration.ofMinutes(5)), ZoneId.systemDefault()))) + + task.start(props) + + val list = mutableListOf() + await().atMost(30, TimeUnit.SECONDS).until { + task.poll()?.let { list.addAll(it) } + val actualList = list.map { (it.value() as Struct).toMap() } + expected.containsAll(actualList) + } + } + + private fun insertRecords( + totalRecords: Int, + clock: Clock = Clock.systemDefaultZone(), + longToInt: Boolean = false + ) = session.beginTransaction().use { tx -> val elements = (1..totalRecords).map { @@ -203,7 +542,7 @@ class Neo4jQueryTaskTest { """ |CREATE (n:Test{ | name: 'Name ' + $it, - | timestamp: timestamp(), + | timestamp: ${'$'}timestamp, | point: point({longitude: 56.7, latitude: 12.78, height: 8}), | array: [1,2,3], | datetime: localdatetime(), @@ -220,7 +559,8 @@ class Neo4jQueryTaskTest { | } AS map, | n AS node """ - .trimMargin()) + .trimMargin(), + mapOf("timestamp" to clock.millis())) val next = result.next() val map = next.asMap().toMutableMap() map["array"] = @@ -368,4 +708,19 @@ class Neo4jQueryTaskTest { actualList.first() == expected } } + + private fun newTaskContextWithOffset(property: String, offset: Long): SourceTaskContext { + return newTaskContextWithOffset(mapOf("property" to property, "value" to offset)) + } + + private fun newTaskContextWithOffset( + offsetMap: Map = emptyMap() + ): SourceTaskContext { + val offsetStorageReader = + mock { + on { offset(ArgumentMatchers.anyMap()) } doReturn offsetMap + } + + return mock { on { offsetStorageReader() } doReturn offsetStorageReader } + } } diff --git a/source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt b/source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt deleted file mode 100644 index 68aab8be9..000000000 --- a/source/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorTest.kt +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package streams.kafka.connect.source - -import kotlin.test.assertContains -import kotlin.test.assertEquals -import kotlin.test.assertNotNull -import kotlin.test.assertTrue -import org.junit.jupiter.api.Test -import org.neo4j.connectors.kafka.source.DeprecatedNeo4jSourceConfiguration - -class Neo4jSourceConnectorTest { - - @Test - fun `should not allow cdc as source type during validation`() { - val connector = Neo4jSourceConnector() - val config = connector.validate(mapOf(DeprecatedNeo4jSourceConfiguration.SOURCE_TYPE to "CDC")) - - val entry = - config.configValues().first { it.name() == DeprecatedNeo4jSourceConfiguration.SOURCE_TYPE } - assertNotNull(entry) - assertEquals(listOf("QUERY"), entry.recommendedValues()) - assertTrue(entry.errorMessages().isNotEmpty()) - assertContains( - entry.errorMessages(), - "Invalid value CDC for configuration neo4j.source.type: Must be one of: 'QUERY'.") - } -}