Skip to content

Commit 8d7f116

Browse files
committed
Fix LAPJV alignment regression + coverage for mutual_clustering_impl.h
lap_impl.h: add __attribute__((optimize("align-functions=64", "align-loops=16"))) on lap() (GCC only) to stabilise instruction alignment across TU layout changes. cost_matrix.h: add setWithTranspose() and markTransposed() methods. lap.cpp: use combined fill+transpose in lapjv() to eliminate the extra makeTranspose() O(n²) pass introduced by the header refactor. tree_distance_functions.cpp: add cpp_mci_impl_score() wrapper that calls TreeDist::mutual_clustering_score() from the installable header, covering find_exact_matches_raw() and the MCI score computation. test-mci_impl.R: exercises exact-match early exit and partial-match + LAP paths through the header implementation.
1 parent 2662792 commit 8d7f116

File tree

7 files changed

+135
-4
lines changed

7 files changed

+135
-4
lines changed

R/RcppExports.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,10 @@ cpp_transfer_dist_cross_pairs <- function(splits_a, splits_b, n_tip, scale, n_th
267267
.Call(`_TreeDist_cpp_transfer_dist_cross_pairs`, splits_a, splits_b, n_tip, scale, n_threads)
268268
}
269269

270+
cpp_mci_impl_score <- function(x, y, n_tips) {
271+
.Call(`_TreeDist_cpp_mci_impl_score`, x, y, n_tips)
272+
}
273+
270274
cpp_robinson_foulds_distance <- function(x, y, nTip) {
271275
.Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip)
272276
}

inst/include/TreeDist/cost_matrix.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,16 @@ class CostMatrix {
9090
return &t_data_[static_cast<std::size_t>(i) * dim8_];
9191
}
9292

93+
// Write a value to both the data and transposed-data arrays.
94+
// After filling the entire matrix this way, call markTransposed()
95+
// to skip the lazy transpose in makeTranspose().
96+
void setWithTranspose(lap_row i, lap_col j, cost value) {
97+
data_[static_cast<std::size_t>(i) * dim8_ + j] = value;
98+
t_data_[static_cast<std::size_t>(j) * dim8_ + i] = value;
99+
}
100+
101+
void markTransposed() noexcept { transposed_ = true; }
102+
93103
// ---- Transpose ----
94104

95105
void makeTranspose() noexcept {

inst/include/TreeDist/lap_impl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ namespace detail {
3232
}
3333
} // namespace detail
3434

35+
// The LAP hot loops are sensitive to instruction alignment — even unrelated
36+
// changes to other code in the same translation unit can shift layout enough
37+
// to cause measurable regressions. Force 64-byte function alignment and
38+
// 16-byte loop alignment to stabilise performance across builds.
39+
#if defined(__GNUC__) && !defined(__clang__)
40+
__attribute__((optimize("align-functions=64", "align-loops=16")))
41+
#endif
3542
cost lap(const lap_row dim,
3643
CostMatrix& input_cost,
3744
std::vector<lap_col>& rowsol,

src/RcppExports.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,19 @@ BEGIN_RCPP
611611
return rcpp_result_gen;
612612
END_RCPP
613613
}
614+
// cpp_mci_impl_score
615+
double cpp_mci_impl_score(const Rcpp::RawMatrix& x, const Rcpp::RawMatrix& y, int n_tips);
616+
RcppExport SEXP _TreeDist_cpp_mci_impl_score(SEXP xSEXP, SEXP ySEXP, SEXP n_tipsSEXP) {
617+
BEGIN_RCPP
618+
Rcpp::RObject rcpp_result_gen;
619+
Rcpp::RNGScope rcpp_rngScope_gen;
620+
Rcpp::traits::input_parameter< const Rcpp::RawMatrix& >::type x(xSEXP);
621+
Rcpp::traits::input_parameter< const Rcpp::RawMatrix& >::type y(ySEXP);
622+
Rcpp::traits::input_parameter< int >::type n_tips(n_tipsSEXP);
623+
rcpp_result_gen = Rcpp::wrap(cpp_mci_impl_score(x, y, n_tips));
624+
return rcpp_result_gen;
625+
END_RCPP
626+
}
614627
// cpp_robinson_foulds_distance
615628
List cpp_robinson_foulds_distance(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip);
616629
RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) {
@@ -753,6 +766,7 @@ static const R_CallMethodDef CallEntries[] = {
753766
{"_TreeDist_cpp_transfer_dist_scored", (DL_FUNC) &_TreeDist_cpp_transfer_dist_scored, 4},
754767
{"_TreeDist_cpp_transfer_dist_all_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_all_pairs, 4},
755768
{"_TreeDist_cpp_transfer_dist_cross_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_cross_pairs, 5},
769+
{"_TreeDist_cpp_mci_impl_score", (DL_FUNC) &_TreeDist_cpp_mci_impl_score, 3},
756770
{"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3},
757771
{"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3},
758772
{"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3},

src/lap.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,25 @@ Rcpp::List lapjv(Rcpp::NumericMatrix &x, Rcpp::NumericVector &maxX) {
5050
std::vector<lap_row> colsol(max_dim);
5151

5252
// Build cost matrix from R's column-major NumericMatrix.
53+
// Fill data and transpose simultaneously to avoid a separate
54+
// makeTranspose() pass in lap().
5355
cost_matrix input(max_dim);
5456
const double* __restrict__ src_data = REAL(x);
5557
for (lap_row r = 0; r < n_row; ++r) {
5658
for (lap_col c = 0; c < n_col; ++c) {
57-
input(r, c) = static_cast<cost>(
58-
src_data[static_cast<std::size_t>(c) * n_row + r] * scale_factor);
59+
input.setWithTranspose(r, c, static_cast<cost>(
60+
src_data[static_cast<std::size_t>(c) * n_row + r] * scale_factor));
61+
}
62+
for (lap_col c = n_col; c < max_dim; ++c) {
63+
input.setWithTranspose(r, c, max_score);
64+
}
65+
}
66+
for (lap_row r = n_row; r < max_dim; ++r) {
67+
for (lap_col c = 0; c < max_dim; ++c) {
68+
input.setWithTranspose(r, c, max_score);
5969
}
60-
input.padRowAfterCol(r, n_col, max_score);
6170
}
62-
input.padAfterRow(n_row, max_score);
71+
input.markTransposed();
6372

6473
cost score = lap(max_dim, input, rowsol, colsol);
6574

src/tree_distance_functions.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <TreeTools/SplitList.h>
12
#include <Rcpp/Lightest>
23

34
// Provide the MCI table definitions and implementation in this TU.
@@ -11,3 +12,36 @@ __attribute__((constructor))
1112
void initialize_ldf() {
1213
TreeDist::init_lg2_tables(SL_MAX_TIPS);
1314
}
15+
16+
// Thin wrapper that exercises the installable-header version of
17+
// mutual_clustering_score(), for test coverage and downstream validation.
18+
// [[Rcpp::export]]
19+
double cpp_mci_impl_score(const Rcpp::RawMatrix& x,
20+
const Rcpp::RawMatrix& y,
21+
int n_tips) {
22+
using TreeTools::SplitList;
23+
24+
const SplitList a(x);
25+
const SplitList b(y);
26+
TreeDist::LapScratch scratch;
27+
28+
// Build arrays matching the header's raw-pointer API types.
29+
std::vector<const splitbit*> a_ptrs(a.n_splits);
30+
std::vector<const splitbit*> b_ptrs(b.n_splits);
31+
std::vector<TreeDist::int16> a_in(a.n_splits);
32+
std::vector<TreeDist::int16> b_in(b.n_splits);
33+
for (TreeDist::int16 i = 0; i < a.n_splits; ++i) {
34+
a_ptrs[i] = a.state[i];
35+
a_in[i] = static_cast<TreeDist::int16>(a.in_split[i]);
36+
}
37+
for (TreeDist::int16 i = 0; i < b.n_splits; ++i) {
38+
b_ptrs[i] = b.state[i];
39+
b_in[i] = static_cast<TreeDist::int16>(b.in_split[i]);
40+
}
41+
42+
return TreeDist::mutual_clustering_score(
43+
a_ptrs.data(), a_in.data(), a.n_splits,
44+
b_ptrs.data(), b_in.data(), b.n_splits,
45+
a.n_bins, static_cast<TreeDist::int32>(n_tips),
46+
scratch);
47+
}

tests/testthat/test-mci_impl.R

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
test_that("Header mutual_clustering_score matches MutualClusteringInfo", {
2+
skip_if_not_installed("TreeDist")
3+
library(TreeTools)
4+
library(TreeDist)
5+
6+
bal8 <- BalancedTree(8)
7+
pec8 <- PectinateTree(8)
8+
star8 <- StarTree(8)
9+
10+
tips <- TipLabels(bal8)
11+
n_tip <- length(tips)
12+
splits_bal <- as.Splits(bal8, tips)
13+
splits_pec <- as.Splits(pec8, tips)
14+
splits_star <- as.Splits(star8, tips)
15+
16+
# Score-only from the installable-header implementation
17+
impl_score <- TreeDist:::cpp_mci_impl_score
18+
19+
impl_bal_pec <- impl_score(splits_bal, splits_pec, n_tip)
20+
impl_bal_bal <- impl_score(splits_bal, splits_bal, n_tip)
21+
impl_star <- impl_score(splits_bal, splits_star, n_tip)
22+
23+
# Reference from MutualClusteringInfo (unnormalized score)
24+
ref_bal_pec <- MutualClusteringInfo(bal8, pec8)
25+
ref_bal_bal <- MutualClusteringInfo(bal8, bal8)
26+
27+
expect_equal(impl_bal_pec, ref_bal_pec, tolerance = 1e-10)
28+
expect_equal(impl_bal_bal, ref_bal_bal, tolerance = 1e-10)
29+
expect_equal(impl_star, 0)
30+
})
31+
32+
test_that("Header MCI covers exact-match early exit and partial LAP", {
33+
skip_if_not_installed("TreeDist")
34+
library(TreeTools)
35+
library(TreeDist)
36+
impl_score <- TreeDist:::cpp_mci_impl_score
37+
38+
# Two identical trees → all splits match exactly (early exit path)
39+
bal20 <- BalancedTree(20)
40+
tips <- TipLabels(bal20)
41+
n_tip <- length(tips)
42+
splits20 <- as.Splits(bal20, tips)
43+
44+
result <- impl_score(splits20, splits20, n_tip)
45+
expect_equal(result, MutualClusteringInfo(bal20, bal20), tolerance = 1e-10)
46+
47+
# Trees that share some but not all splits → partial match + LAP
48+
pec20 <- PectinateTree(20)
49+
splits_pec20 <- as.Splits(pec20, tips)
50+
51+
result2 <- impl_score(splits20, splits_pec20, n_tip)
52+
expect_equal(result2, MutualClusteringInfo(bal20, pec20), tolerance = 1e-10)
53+
})

0 commit comments

Comments
 (0)