Skip to content

Implement is_empty method #2668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

**Features**

- A new ``Tree.is_empty`` method can be used to determine if a tree is empty
for 2 definitions of emptiness (:user:`hyanwong`, :pr:`2668`, :issue:`2640`)

- The ``TreeSequence`` object now has the attributes ``min_time`` and ``max_time``,
which are the minimum and maximum among the node times and mutation times,
respectively. (:user:`szhan`, :pr:`2612`, :issue:`2271`)
Expand Down
79 changes: 79 additions & 0 deletions python/tests/test_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,85 @@ def test_multiple_trees(self):
self.verify(ts, no_root_ts)


class TestEmptyTrees(TopologyTestCase):
"""
Tests for different sorts of "empty" trees
"""

@pytest.mark.parametrize("check_roots", [None, True, False])
def test_no_nodes(self, check_roots):
tables = tskit.TableCollection(1)
tree = tables.tree_sequence().first()
assert tree.is_empty(check_roots=check_roots)

@pytest.mark.parametrize("check_roots", [None, True, False])
@pytest.mark.parametrize("root_threshold", [1, 2, 3])
def test_normal(self, check_roots, root_threshold):
tree = tskit.Tree.generate_balanced(2, root_threshold=root_threshold)
if check_roots and root_threshold > 2:
assert tree.is_empty(check_roots=check_roots)
else:
assert not tree.is_empty(check_roots=check_roots)

@pytest.mark.parametrize("check_roots", [None, True, False])
@pytest.mark.parametrize("root_threshold", [1, 2])
def test_stick(self, check_roots, root_threshold):
ts = tskit.Tree.generate_balanced(2).tree_sequence
stick_tree = ts.simplify([0], keep_unary=True).first(
root_threshold=root_threshold
)
if check_roots and root_threshold > 1:
assert stick_tree.is_empty(check_roots=check_roots)
else:
assert not stick_tree.is_empty(check_roots=check_roots)

@pytest.mark.parametrize("check_roots", [None, True, False])
@pytest.mark.parametrize("root_threshold", [1, 2])
def test_upsidedown_stick(self, check_roots, root_threshold):
ts = tskit.Tree.generate_balanced(2).tree_sequence
tables = ts.simplify([0], keep_unary=True).dump_tables()
# swap flags so that non-sample is dangling off sample
tables.nodes.flags = 1 - tables.nodes.flags
upsidedown_stick_tree = tables.tree_sequence().first(
root_threshold=root_threshold
)
if check_roots and root_threshold > 1:
assert upsidedown_stick_tree.is_empty(check_roots=check_roots)
else:
assert not upsidedown_stick_tree.is_empty(check_roots=check_roots)

@pytest.mark.parametrize("check_roots", [None, True, False])
@pytest.mark.parametrize("root_threshold", [1, 2])
def test_multiroot_non_empty(self, check_roots, root_threshold):
tables = tskit.Tree.generate_balanced(2).tree_sequence.dump_tables()
tables.edges.truncate(1)
multiroot_tree = tables.tree_sequence().first(root_threshold=root_threshold)
assert multiroot_tree.num_roots == (2 if root_threshold == 1 else 0)
if check_roots and root_threshold > 1:
assert multiroot_tree.is_empty(check_roots=check_roots)
else:
assert not multiroot_tree.is_empty(check_roots=check_roots)

@pytest.mark.parametrize("check_roots", [True, False])
@pytest.mark.parametrize("root_threshold", [1, 2])
def test_empty(self, check_roots, root_threshold):
tables = tskit.Tree.generate_balanced(2).tree_sequence.dump_tables()
tables.delete_intervals([[0, 1]])
# check that sites & mutations make no difference
s = tables.sites.add_row(position=0.5, ancestral_state="0")
s = tables.mutations.add_row(site=s, derived_state="1", node=0)
tree = tables.tree_sequence().first(root_threshold=root_threshold)
assert tree.is_empty(check_roots=check_roots)

def test_dead_branch(self):
tables = tskit.Tree.generate_balanced(2).tree_sequence.dump_tables()
tables.nodes.flags = np.zeros_like(tables.nodes.flags)
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE)
tree = tables.tree_sequence().first()
assert not tree.is_empty(check_roots=False)
assert tree.is_empty(check_roots=True)


class TestEmptyTreeSequences(TopologyTestCase):
"""
Tests covering tree sequences that have zero edges.
Expand Down
29 changes: 29 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,35 @@ def seek(self, position):
raise ValueError("Position out of bounds")
self._ll_tree.seek(position)

def is_empty(self, check_roots=None) -> bool:
"""
Check if this tree is "empty" (i.e. has no topology). A tree is empty if it has
no edges. However, it may also be considered empty if it contains edges which
represent :ref:`dead branches<sec_data_model_tree_dead_leaves_and_branches>`
(i.e. not reachable from the :meth:`~Tree.roots` of the tree). To consider such
a tree as empty too, which is more involved, specify ``check_roots=True``.

Note that this is purely a property of the topology. An "empty" tree can still
contain sites and there may even be mutations on those sites.

:param bool check_roots: Should we also consider a tree empty if it has
topology but the topology is unconnected to any of the roots of the tree?
Default: ``None`` treated as ``False``.
:return: ``True`` if this tree is empty, ``False`` otherwise.
"""
if self.num_edges == 0:
return True

if not check_roots:
return False

# Exhaustively check the roots: it's not simply enough to check that the roots
# are all samples, as they could still have children
for u in self.roots:
if self.num_children(u) != 0:
return False
return True

def rank(self) -> tskit.Rank:
"""
Produce the rank of this tree in the enumeration of all leaf-labelled
Expand Down