Skip to content

Commit abcc4e8

Browse files
committed
Implement efficient initialization of null trees.
1 parent ec992da commit abcc4e8

File tree

1 file changed

+104
-1
lines changed

1 file changed

+104
-1
lines changed

c/tskit/trees.c

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4551,8 +4551,103 @@ tsk_tree_position_in_interval(const tsk_tree_t *self, double x)
45514551
return self->interval.left <= x && x < self->interval.right;
45524552
}
45534553

4554+
static int
4555+
tsk_tree_seek_from_null(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))
4556+
{
4557+
int ret = 0;
4558+
tsk_size_t edge, num_edges = self->tree_sequence->tables->edges.num_rows,
4559+
num_trees = self->tree_sequence->num_trees;
4560+
tsk_id_t p, c, e, j, k, tree_index;
4561+
const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);
4562+
const tsk_id_t *restrict edge_parent = self->tree_sequence->tables->edges.parent;
4563+
const tsk_id_t *restrict edge_child = self->tree_sequence->tables->edges.child;
4564+
const double *restrict edge_left = self->tree_sequence->tables->edges.left;
4565+
const double *restrict edge_right = self->tree_sequence->tables->edges.right;
4566+
const double *restrict breakpoints = self->tree_sequence->breakpoints;
4567+
const tsk_id_t *restrict insertion
4568+
= self->tree_sequence->tables->indexes.edge_insertion_order;
4569+
const tsk_id_t *restrict removal
4570+
= self->tree_sequence->tables->indexes.edge_removal_order;
4571+
4572+
/* NOTE: this differ's from JK's proposal by not using
4573+
* edge diffs explicitly. However:
4574+
* 1. It benchmarked faster (in rust).
4575+
* 2. It is a lot less code and doesn't have
4576+
* to do "edge diffs, but backwards".
4577+
*/
4578+
4579+
// NOTE: it may be better to get the
4580+
// index first and then ask if we are
4581+
// searching in the first or last 1/2
4582+
// of trees.
4583+
j = -1;
4584+
if (x <= L / 2.0) {
4585+
for (edge = 0; edge < num_edges; ++edge) {
4586+
e = insertion[edge];
4587+
if (edge_left[e] > x) {
4588+
j = (tsk_id_t) edge;
4589+
break;
4590+
}
4591+
if (x >= edge_left[e] && x < edge_right[e]) {
4592+
p = edge_parent[e];
4593+
c = edge_child[e];
4594+
tsk_tree_insert_edge(self, p, c, e);
4595+
}
4596+
}
4597+
} else {
4598+
for (edge = 0; edge < num_edges; ++edge) {
4599+
e = removal[num_edges - edge - 1];
4600+
if (edge_right[e] < x) {
4601+
j = (tsk_id_t)(num_edges - edge - 1);
4602+
while (j < (tsk_id_t) num_edges && edge_left[insertion[j]] <= x) {
4603+
j += 1;
4604+
}
4605+
break;
4606+
}
4607+
if (x >= edge_left[e] && x < edge_right[e]) {
4608+
p = edge_parent[e];
4609+
c = edge_child[e];
4610+
tsk_tree_insert_edge(self, p, c, e);
4611+
}
4612+
}
4613+
}
4614+
4615+
if (j == -1) {
4616+
j = 0;
4617+
while (j < (tsk_id_t) num_edges && edge_left[insertion[j]] <= x) {
4618+
j += 1;
4619+
}
4620+
}
4621+
k = 0;
4622+
while (k < (tsk_id_t) num_edges && edge_right[removal[k]] <= x) {
4623+
k += 1;
4624+
}
4625+
4626+
/* NOTE: tsk_search_sorted finds the first the first
4627+
* insertion locatiom >= the query point, which
4628+
* finds a RIGHT value for queries not at the left edge.
4629+
*/
4630+
tree_index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, x);
4631+
if (breakpoints[tree_index] > x) {
4632+
tree_index -= 1;
4633+
}
4634+
self->index = tree_index;
4635+
self->interval.left = breakpoints[tree_index];
4636+
self->interval.right = breakpoints[tree_index + 1];
4637+
self->left_index = j;
4638+
self->right_index = k;
4639+
self->direction = TSK_DIR_FORWARD;
4640+
self->num_nodes = self->tree_sequence->tables->nodes.num_rows;
4641+
if (self->tree_sequence->tables->sites.num_rows > 0) {
4642+
self->sites = self->tree_sequence->tree_sites[self->index];
4643+
self->sites_length = self->tree_sequence->tree_sites_length[self->index];
4644+
}
4645+
4646+
return ret;
4647+
}
4648+
45544649
int TSK_WARN_UNUSED
4555-
tsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))
4650+
tsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t options)
45564651
{
45574652
int ret = 0;
45584653
const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);
@@ -4565,6 +4660,13 @@ tsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))
45654660
goto out;
45664661
}
45674662

4663+
if (self->index == -1) {
4664+
ret = tsk_tree_seek_from_null(self, x, options);
4665+
if (ret < 0) {
4666+
goto out;
4667+
}
4668+
}
4669+
45684670
if (x < t_l) {
45694671
/* |-----|-----|========|---------| */
45704672
/* 0 x t_l t_r L */
@@ -4592,6 +4694,7 @@ tsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))
45924694
}
45934695
}
45944696
ret = 0;
4697+
45954698
out:
45964699
return ret;
45974700
}

0 commit comments

Comments
 (0)