Skip to content

Commit 8691991

Browse files
authored
Add MCITree(): Maximum Clade Information tree selection (#191)
Selects the tree from a posterior sample with the highest total split information content — analogous to Maximum Clade Credibility but using phylogenetic or clustering information content. Supports three modes: phylogenetic (default), clustering, and credibility (MCC-equivalent). Uses only existing TreeTools/TreeDist APIs (SplitFrequency, SplitwiseInfo, ClusteringInfo) — no dependency on the unreleased TreeTools .CountSplits branch.
1 parent 51ff632 commit 8691991

File tree

6 files changed

+196
-6
lines changed

6 files changed

+196
-6
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ export(LnTreesConsistentWithTwoSplits)
8484
export(Log2TreesConsistentWithTwoSplits)
8585
export(MASTInfo)
8686
export(MASTSize)
87+
export(MCITree)
8788
export(MSTSegments)
8889
export(MapTrees)
8990
export(MappingQuality)
@@ -175,6 +176,7 @@ importFrom(TreeTools,Preorder)
175176
importFrom(TreeTools,RenumberTips)
176177
importFrom(TreeTools,RootOnNode)
177178
importFrom(TreeTools,RootTree)
179+
importFrom(TreeTools,SplitFrequency)
178180
importFrom(TreeTools,SplitInformation)
179181
importFrom(TreeTools,SplitsInBinaryTree)
180182
importFrom(TreeTools,TipLabels)

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# TreeDist 2.13.0 (2026-03-17)
22

3+
## New features
4+
5+
- `MCITree()` selects the tree from a posterior sample with the highest
6+
total split information content — a Maximum Clade Information analogue
7+
of the Maximum Clade Credibility tree.
8+
39
## Performance
410

511
Pairwise distance computation has been substantially optimized.

R/tree_information.R

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,77 @@ ConsensusInfo <- function(trees, info = "phylogenetic", p = 0.5,
379379
}
380380
consensus_info(trees, mode == 1L, p = safeP)
381381
}
382+
383+
#' Maximum Clade Information Tree
384+
#'
385+
#' Analogous to the Maximum Clade Credibility tree:
386+
#' select the tree from a posterior distribution whose clades have the
387+
#' highest information content.
388+
#' Generate the MCC tree by specifying `info = "credibility"`.
389+
#'
390+
#' @inheritParams ConsensusInfo
391+
#' @return `MCITree()` returns the tree with the highest information
392+
#' content, selected from `trees`.
393+
#' @examples
394+
#' library("TreeTools", quietly = TRUE)
395+
#' trees <- as.phylo(24:40, 16)
396+
#'
397+
#' # Maximum Clade Information tree
398+
#' mci <- MCITree(trees)
399+
#' SplitwiseInfo(mci)
400+
#' plot(mci)
401+
#' p <- SplitFrequency(mci, trees) / length(trees)
402+
#' LabelSplits(mci, round(p * 100), "%", bg = SupportColor(p))
403+
#'
404+
#' \donttest{
405+
#' # Compare with Maximum Clade Credibility tree
406+
#' mcc <- MCITree(trees, "credibility")
407+
#' plot(mcc)
408+
#' p <- SplitFrequency(mcc, trees) / length(trees)
409+
#' LabelSplits(mcc, round(p * 100), "%", bg = SupportColor(p))
410+
#' SplitwiseInfo(mcc)
411+
#' }
412+
#'
413+
#' @template MRS
414+
#' @importFrom TreeTools as.Splits SplitFrequency
415+
#' @export
416+
MCITree <- function(trees, info = "phylogenetic", check.tips = TRUE) {
417+
mode <- pmatch(tolower(info),
418+
c("phylogenetic", "clustering", "credibility",
419+
"spic", "scic"))
420+
if (is.na(mode)) {
421+
stop("`info` must be \"phylogenetic\", \"clustering\" or \"credibility\"")
422+
}
423+
if (inherits(trees, "phylo")) {
424+
return(trees)
425+
}
426+
if (length(trees) == 1L) {
427+
return(trees[[1]])
428+
}
429+
if (check.tips) {
430+
trees <- RenumberTips(trees, trees[[1]])
431+
}
432+
433+
nTree <- length(trees)
434+
treeSplits <- as.Splits(trees)
435+
InfoFn <- switch(mode,
436+
function(sp, p) SplitwiseInfo(sp, p, sum = TRUE),
437+
function(sp, p) ClusteringInfo(sp, p, sum = TRUE),
438+
function(sp, p) sum(p),
439+
function(sp, p) SplitwiseInfo(sp, p, sum = TRUE),
440+
function(sp, p) ClusteringInfo(sp, p, sum = TRUE)
441+
)
442+
443+
treeInfo <- vapply(treeSplits, function(sp) {
444+
p <- SplitFrequency(sp, trees) / nTree
445+
InfoFn(sp, p)
446+
}, double(1))
447+
448+
chosen <- which.max(treeInfo)
449+
nBest <- sum(treeInfo == max(treeInfo))
450+
if (nBest > 1L) {
451+
message(nBest, " trees tied for best score; returning tree ", chosen)
452+
}
453+
454+
trees[[chosen]]
455+
}

inst/WORDLIST

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ JV
2323
Jaccard
2424
Kaski
2525
Kawa
26+
KendallColijn
2627
LAPJV
2728
LSAP
2829
Leeuwen
30+
MCC
2931
MDS
3032
MKL
3133
MacKay
@@ -37,29 +39,28 @@ Meila
3739
Mol
3840
Munkres
3941
MutualClusteringInfo
40-
Nura
4142
NNI
4243
NNIDist
44+
Nura
4345
NyeTreeSimilarity
4446
OEIS
4547
ORCID
4648
OpenMP
49+
POPCNT
4750
Perotti
4851
PhysRevE
4952
PlotTools
50-
POPCNT
5153
R's
52-
Rcpp
5354
RStudio
5455
Rcpp
5556
RdMacros
5657
RdPack
5758
Regraft
59+
SPI
60+
SPR
5861
Sammon
5962
Sammon's
6063
Soneson's
61-
SPR
62-
SPI
6364
Stamatakis
6465
TBR
6566
TBRDist
@@ -78,6 +79,7 @@ Yi
7879
ac
7980
aways
8081
bipartitions
82+
branchless
8183
btaa
8284
bti
8385
codecov
@@ -98,8 +100,8 @@ hypervolumes
98100
ingroup
99101
interdecile
100102
jonker
101-
magiclogic
102103
macOS
104+
magiclogic
103105
mergesort
104106
molbev
105107
msw
@@ -116,6 +118,8 @@ roy
116118
scic
117119
sensu
118120
shinyjs
121+
speedup
122+
speedups
119123
splitwise
120124
syab
121125
sysbio

man/MCITree.Rd

Lines changed: 52 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-MCITree.R

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
test_that("MCITree returns one of the input trees", {
2+
trees <- as.phylo(24:40, 16)
3+
mci <- MCITree(trees)
4+
expect_true(any(vapply(trees, all.equal, TRUE, mci) == "TRUE"))
5+
})
6+
7+
test_that("MCITree modes give valid results", {
8+
trees <- as.phylo(24:40, 16)
9+
for (mode in c("phylogenetic", "clustering", "credibility")) {
10+
result <- MCITree(trees, mode)
11+
expect_s3_class(result, "phylo")
12+
}
13+
})
14+
15+
test_that("MCITree aliases match base modes", {
16+
trees <- as.phylo(24:40, 16)
17+
expect_equal(MCITree(trees, "spic"), MCITree(trees, "phylogenetic"))
18+
expect_equal(MCITree(trees, "scic"), MCITree(trees, "clustering"))
19+
})
20+
21+
test_that("MCITree handles edge cases", {
22+
tree <- PectinateTree(8)
23+
expect_identical(MCITree(tree), tree)
24+
25+
trees <- c(PectinateTree(8), BalancedTree(8))
26+
expect_identical(MCITree(trees[1]), trees[[1]])
27+
})
28+
29+
test_that("MCITree rejects invalid info", {
30+
trees <- as.phylo(24:40, 16)
31+
expect_error(MCITree(trees, "invalid"), "must be")
32+
})
33+
34+
test_that("MCITree credibility selects highest-support tree", {
35+
# 10 copies of tree A + 1 copy of tree B:
36+
# tree A's splits have frequency 10/11; tree B's have ~1/11.
37+
# The credibility tree should be one of the A copies.
38+
treeA <- PectinateTree(8)
39+
treeB <- BalancedTree(8)
40+
trees <- c(rep(list(treeA), 10), list(treeB))
41+
class(trees) <- "multiPhylo"
42+
mcc <- MCITree(trees, "credibility")
43+
expect_equal(mcc, treeA)
44+
})
45+
46+
test_that("MCITree messages on ties", {
47+
# Identical trees should tie
48+
tree <- PectinateTree(8)
49+
trees <- rep(list(tree), 5)
50+
class(trees) <- "multiPhylo"
51+
expect_message(MCITree(trees), "tied")
52+
})

0 commit comments

Comments
 (0)