Skip to content

DC-aware LBP - better support the deprecated parameters #321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions .github/workflows/build-lint-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ name: Build

on:
push:
branches: [ master ]
branches: [master]
pull_request:
branches: [ master ]
branches: [master]

env:
CARGO_TERM_COLOR: always
# Should include `INTEGRATION_TEST_BIN` from the `Makefile`
# TODO: Remove `build/libscylla-cpp-driver.*` after https://github.com/scylladb/cpp-rust-driver/issues/164 is fixed.
INTEGRATION_TEST_BIN: |
build/cassandra-integration-tests
build/cassandra-integration-tests
build/libscylla-cpp-driver.*
INTEGRATION_TEST_BIN_CACHE_KEY: integration-test-bin-${{ github.sha }}
# Goes to `Makefile` to let it pickup cached binary
Expand All @@ -25,7 +25,7 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Update apt cache
run: sudo apt-get update -y

Expand Down Expand Up @@ -57,7 +57,13 @@ jobs:

strategy:
matrix:
scylla-version: [ENTERPRISE-RELEASE, ENTERPRISE-PRIOR-RELEASE, OSS-RELEASE, OSS-PRIOR-RELEASE, 5.4.8]
scylla-version:
[
ENTERPRISE-RELEASE,
ENTERPRISE-PRIOR-RELEASE,
OSS-RELEASE,
OSS-PRIOR-RELEASE,
]
fail-fast: false

steps:
Expand All @@ -67,7 +73,7 @@ jobs:
- name: Setup Python 3
uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: "3.11"

- name: Update apt cache
run: sudo apt-get update -y
Expand Down Expand Up @@ -165,13 +171,13 @@ jobs:
uses: actions/setup-java@v4
with:
java-version: ${{ matrix.java-version }}
distribution: 'adopt'
distribution: "adopt"

- name: Setup Python 3
uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: "3.11"

- name: Update apt cache
run: sudo apt-get update -y

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ SCYLLA_TEST_FILTER := $(subst ${SPACE},${EMPTY},ClusterTests.*\
:MetricsTests.Integration_Cassandra_ErrorsRequestTimeouts\
:MetricsTests.Integration_Cassandra_Requests\
:MetricsTests.Integration_Cassandra_StatsShardConnections\
:DcAwarePolicyTest.*\
:-PreparedTests.Integration_Cassandra_PreparedIDUnchangedDuringReprepare\
:SchemaMetadataTest.Integration_Cassandra_RegularMetadataNotMarkedVirtual\
:SchemaMetadataTest.Integration_Cassandra_VirtualMetadata\
Expand All @@ -46,7 +47,6 @@ SCYLLA_TEST_FILTER := $(subst ${SPACE},${EMPTY},ClusterTests.*\
:ExecutionProfileTest.Integration_Cassandra_RoundRobin\
:ExecutionProfileTest.Integration_Cassandra_TokenAwareRouting\
:ExecutionProfileTest.Integration_Cassandra_SpeculativeExecutionPolicy\
:DCExecutionProfileTest.Integration_Cassandra_DCAware\
:ControlConnectionTests.Integration_Cassandra_TopologyChange\
:ControlConnectionTests.Integration_Cassandra_FullOutage\
:ControlConnectionTests.Integration_Cassandra_TerminatedUsingMultipleIoThreadsWithError\
Expand Down Expand Up @@ -98,6 +98,7 @@ CASSANDRA_TEST_FILTER := $(subst ${SPACE},${EMPTY},ClusterTests.*\
:MetricsTests.Integration_Cassandra_ErrorsRequestTimeouts\
:MetricsTests.Integration_Cassandra_Requests\
:MetricsTests.Integration_Cassandra_StatsShardConnections\
:DcAwarePolicyTest.*\
:-PreparedTests.Integration_Cassandra_PreparedIDUnchangedDuringReprepare\
:PreparedTests.Integration_Cassandra_FailFastWhenPreparedIDChangesDuringReprepare\
:SchemaMetadataTest.Integration_Cassandra_RegularMetadataNotMarkedVirtual\
Expand All @@ -107,7 +108,6 @@ CASSANDRA_TEST_FILTER := $(subst ${SPACE},${EMPTY},ClusterTests.*\
:ExecutionProfileTest.Integration_Cassandra_RoundRobin\
:ExecutionProfileTest.Integration_Cassandra_TokenAwareRouting\
:ExecutionProfileTest.Integration_Cassandra_SpeculativeExecutionPolicy\
:DCExecutionProfileTest.Integration_Cassandra_DCAware\
:ControlConnectionTests.Integration_Cassandra_TopologyChange\
:ControlConnectionTests.Integration_Cassandra_FullOutage\
:ControlConnectionTests.Integration_Cassandra_TerminatedUsingMultipleIoThreadsWithError\
Expand Down
90 changes: 54 additions & 36 deletions scylla-rust-wrapper/src/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use crate::cass_error::CassError;
use crate::cass_types::CassConsistency;
use crate::exec_profile::{CassExecProfile, ExecProfileName, exec_profile_builder_modify};
use crate::future::CassFuture;
use crate::load_balancing::{CassHostFilter, LoadBalancingConfig, LoadBalancingKind};
use crate::load_balancing::{
CassHostFilter, DcRestriction, LoadBalancingConfig, LoadBalancingKind,
};
use crate::retry_policy::CassRetryPolicy;
use crate::ssl::CassSsl;
use crate::timestamp_generator::CassTimestampGen;
Expand Down Expand Up @@ -826,20 +828,44 @@ pub(crate) unsafe fn set_load_balance_dc_aware_n(
used_hosts_per_remote_dc: c_uint,
allow_remote_dcs_for_local_cl: cass_bool_t,
) -> CassError {
if local_dc_raw.is_null() || local_dc_length == 0 {
let Some(local_dc) = (unsafe { ptr_to_cstr_n(local_dc_raw, local_dc_length) }) else {
tracing::error!(
"Provided null or non-UTF-8 local DC name to cass_*_set_load_balance_dc_aware(_n)!"
);
return CassError::CASS_ERROR_LIB_BAD_PARAMS;
}
};

if used_hosts_per_remote_dc != 0 || allow_remote_dcs_for_local_cl != 0 {
// TODO: Add warning that the parameters are deprecated and not supported in the driver.
if local_dc_length == 0 {
tracing::error!("Provided empty local DC name to cass_*_set_load_balance_dc_aware(_n)!");
return CassError::CASS_ERROR_LIB_BAD_PARAMS;
}

let local_dc = unsafe { ptr_to_cstr_n(local_dc_raw, local_dc_length) }
.unwrap()
.to_string();
let permit_dc_failover = if used_hosts_per_remote_dc > 0 {
// TODO: update cassandra.h documentation to reflect this behaviour.
tracing::warn!(
"cass_*_set_load_balance_dc_aware(_n): `used_hosts_per_remote_dc` parameter is only partially \
supported in the driver: `0` is supported correctly, and any value `>0` has the semantics of \"+inf\", \
which means no limit on the number of hosts per remote DC. This is different from the original cpp-driver! \
To clarify, you can understand this parameter as \"permit_dc_failover\", with `0` being `false` and `>0` \
being `true`."
Comment on lines +843 to +850
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand it is difficult to support connection-opening part of this parameter. We could expose cpp_rust_unstable method to allow that.

What is the problem with exposing LBP part of that? You can wrap LBP, and make an iterator that counts returned nodes per dc, and filters out nodes of DCs that reached the limit.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also handle the failover-for-local-cl parameter in wrapper.

Copy link
Collaborator Author

@wprzytula wprzytula Jun 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand it is difficult to support connection-opening part of this parameter. We could expose cpp_rust_unstable method to allow that.

It's not the matter of the Rust driver's API; it's how its connection pooling works inside.
It's even more about the very cpp-driver's mechanism being flawed. See my findings in #315:

After the initial metadata fetch, connection pools are created only for the non-ignored known nodes.

  • NOTE: How I understand the code, connection pool can later be created only for newly added nodes. This means that if all the nodes from a remote DC that we have opened connections to get DOWN, then the driver will not open connections to another node from that DC!

With such flawed semantics AND a large and nontrivial effort that would be needed to do this in the Rust Driver's inner parts, I'm strongly for not supporting <1, +inf) interval in the same way as cpp-driver.

What is the problem with exposing LBP part of that? You can wrap LBP, and make an iterator that counts returned nodes per dc, and filters out nodes of DCs that reached the limit.

Yes, we could do that. But what would be the benefit of this? I see none. The important part is not to open unnecessary connections. With connections already opened, I don't see any benefit from omitting a strict subset of remote targets from the query plan.

If you see any benefit, please share it. But not seeing it, I'm not going to implement this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With such flawed semantics

Oh I'm not suggesting to follow such weird behaviors. The important part is limiting the connections to N per remote DC - if we have exact semantics more that are more reasonable than cpp-driver, even better.

Ofc there is no need to do this in this PR.

);
true
} else {
false
};

load_balancing_config.load_balancing_kind = Some(LoadBalancingKind::DcAware { local_dc });
let allow_remote_dcs_for_local_cl = allow_remote_dcs_for_local_cl != 0;

load_balancing_config.load_balancing_kind = Some(LoadBalancingKind::DcAware {
local_dc: local_dc.to_owned(),
permit_dc_failover,
allow_remote_dcs_for_local_cl,
});
load_balancing_config.filtering.dc_restriction = if permit_dc_failover {
DcRestriction::None
} else {
DcRestriction::Local(local_dc.to_owned())
};

CassError::CASS_OK
}
Expand Down Expand Up @@ -1757,7 +1783,7 @@ mod tests {
cass_cluster_set_load_balance_dc_aware(
cluster_raw.borrow_mut(),
c"eu".as_ptr(),
0,
0, // forbid DC failover
0
),
CassError::CASS_OK
Expand All @@ -1777,8 +1803,14 @@ mod tests {
let cluster = BoxFFI::as_ref(cluster_raw.borrow()).unwrap();
let load_balancing_kind = &cluster.load_balancing_config.load_balancing_kind;
match load_balancing_kind {
Some(LoadBalancingKind::DcAware { local_dc }) => {
assert_eq!(local_dc, "eu")
Some(LoadBalancingKind::DcAware {
local_dc,
permit_dc_failover,
allow_remote_dcs_for_local_cl,
}) => {
assert_eq!(local_dc, "eu");
assert!(!permit_dc_failover);
assert!(!allow_remote_dcs_for_local_cl);
}
_ => panic!("Expected preferred dc"),
}
Expand Down Expand Up @@ -1814,8 +1846,8 @@ mod tests {
cass_cluster_set_load_balance_dc_aware(
cluster_raw.borrow_mut(),
c"eu".as_ptr(),
0,
0
42, // allow DC failover
cass_true // allow remote DCs for local CL
),
CassError::CASS_OK
);
Expand All @@ -1824,34 +1856,20 @@ mod tests {
let node_location_preference =
&cluster.load_balancing_config.load_balancing_kind;
match node_location_preference {
Some(LoadBalancingKind::DcAware { local_dc }) => {
assert_eq!(local_dc, "eu")
Some(LoadBalancingKind::DcAware {
local_dc,
permit_dc_failover,
allow_remote_dcs_for_local_cl,
}) => {
assert_eq!(local_dc, "eu");
assert!(permit_dc_failover);
assert!(allow_remote_dcs_for_local_cl);
}
_ => panic!("Expected preferred dc"),
}
}
/* Test invalid configurations */
{
// Nonzero deprecated parameters
assert_cass_error_eq!(
cass_cluster_set_load_balance_dc_aware(
cluster_raw.borrow_mut(),
c"eu".as_ptr(),
1,
0
),
CassError::CASS_ERROR_LIB_BAD_PARAMS
);
assert_cass_error_eq!(
cass_cluster_set_load_balance_dc_aware(
cluster_raw.borrow_mut(),
c"eu".as_ptr(),
0,
1
),
CassError::CASS_ERROR_LIB_BAD_PARAMS
);

// null pointers
assert_cass_error_eq!(
cass_cluster_set_load_balance_dc_aware(
Expand Down
34 changes: 9 additions & 25 deletions scylla-rust-wrapper/src/exec_profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,7 @@ mod tests {
cass_execution_profile_set_load_balance_dc_aware(
profile_raw.borrow_mut(),
c"eu".as_ptr(),
0,
0, // forbid DC failover
0
),
CassError::CASS_OK
Expand All @@ -953,36 +953,20 @@ mod tests {
let profile = BoxFFI::as_ref(profile_raw.borrow()).unwrap();
let load_balancing_kind = &profile.load_balancing_config.load_balancing_kind;
match load_balancing_kind {
Some(LoadBalancingKind::DcAware { local_dc }) => {
assert_eq!(local_dc, "eu")
Some(LoadBalancingKind::DcAware {
local_dc,
permit_dc_failover,
allow_remote_dcs_for_local_cl,
}) => {
assert_eq!(local_dc, "eu");
assert!(!permit_dc_failover);
assert!(!allow_remote_dcs_for_local_cl);
}
_ => panic!("Expected preferred dc"),
}
assert!(!profile.load_balancing_config.token_awareness_enabled);
assert!(profile.load_balancing_config.latency_awareness_enabled);
}
/* Test invalid configurations */
{
// Nonzero deprecated parameters
assert_cass_error_eq!(
cass_execution_profile_set_load_balance_dc_aware(
profile_raw.borrow_mut(),
c"eu".as_ptr(),
1,
0
),
CassError::CASS_ERROR_LIB_BAD_PARAMS
);
assert_cass_error_eq!(
cass_execution_profile_set_load_balance_dc_aware(
profile_raw.borrow_mut(),
c"eu".as_ptr(),
0,
1
),
CassError::CASS_ERROR_LIB_BAD_PARAMS
);
}
}

cass_execution_profile_free(profile_raw);
Expand Down
Loading