-
Notifications
You must be signed in to change notification settings - Fork 140
More commit-graph/Bloom filter improvements #659
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5700204
6b63f9b
3c532eb
f1e3a85
c079921
5ed0ce2
b982c9b
af750d8
a95de3c
9c4a00a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -186,24 +186,22 @@ struct bloom_filter *get_bloom_filter(struct repository *r, | |
struct diff_options diffopt; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, René Scharfe wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, SZEDER Gábor wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Derrick Stolee wrote (reply to this):
|
||
int max_changes = 512; | ||
|
||
if (bloom_filters.slab_size == 0) | ||
if (!bloom_filters.slab_size) | ||
return NULL; | ||
|
||
filter = bloom_filter_slab_at(&bloom_filters, c); | ||
|
||
if (!filter->data) { | ||
load_commit_graph_info(r, c); | ||
if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && | ||
r->objects->commit_graph->chunk_bloom_indexes) { | ||
if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) | ||
return filter; | ||
else | ||
return NULL; | ||
} | ||
r->objects->commit_graph->chunk_bloom_indexes) | ||
load_bloom_filter_from_graph(r->objects->commit_graph, filter, c); | ||
} | ||
|
||
if (filter->data || !compute_if_not_present) | ||
if (filter->data) | ||
return filter; | ||
if (!compute_if_not_present) | ||
return NULL; | ||
|
||
repo_diff_setup(r, &diffopt); | ||
diffopt.flags.recursive = 1; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,8 @@ | |
#include "progress.h" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, René Scharfe wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, René Scharfe wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Derrick Stolee wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, René Scharfe wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Derrick Stolee wrote (reply to this):
|
||
#include "bloom.h" | ||
#include "commit-slab.h" | ||
#include "json-writer.h" | ||
#include "trace2.h" | ||
|
||
void git_test_write_commit_graph_or_die(void) | ||
{ | ||
|
@@ -564,10 +566,6 @@ static int prepare_commit_graph(struct repository *r) | |
return !!r->objects->commit_graph; | ||
r->objects->commit_graph_attempted = 1; | ||
|
||
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0)) | ||
die("dying as requested by the '%s' variable on commit-graph load!", | ||
GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD); | ||
|
||
prepare_repo_settings(r); | ||
|
||
if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && | ||
|
@@ -790,6 +788,14 @@ static int parse_commit_in_graph_one(struct repository *r, | |
|
||
int parse_commit_in_graph(struct repository *r, struct commit *item) | ||
{ | ||
static int checked_env = 0; | ||
|
||
if (!checked_env && | ||
git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0)) | ||
die("dying as requested by the '%s' variable on commit-graph parse!", | ||
GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE); | ||
checked_env = 1; | ||
|
||
if (!prepare_commit_graph(r)) | ||
return 0; | ||
return parse_commit_in_graph_one(r, r->objects->commit_graph, item); | ||
|
@@ -882,10 +888,11 @@ struct write_commit_graph_context { | |
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, René Scharfe wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Derrick Stolee wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, René Scharfe wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, René Scharfe wrote (reply to this):
|
||
const struct split_commit_graph_opts *split_opts; | ||
size_t total_bloom_filter_data_size; | ||
const struct bloom_filter_settings *bloom_settings; | ||
}; | ||
|
||
static void write_graph_chunk_fanout(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
static int write_graph_chunk_fanout(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
{ | ||
int i, count = 0; | ||
struct commit **list = ctx->commits.list; | ||
|
@@ -906,17 +913,21 @@ static void write_graph_chunk_fanout(struct hashfile *f, | |
|
||
hashwrite_be32(f, count); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static void write_graph_chunk_oids(struct hashfile *f, int hash_len, | ||
struct write_commit_graph_context *ctx) | ||
static int write_graph_chunk_oids(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
{ | ||
struct commit **list = ctx->commits.list; | ||
int count; | ||
for (count = 0; count < ctx->commits.nr; count++, list++) { | ||
display_progress(ctx->progress, ++ctx->progress_cnt); | ||
hashwrite(f, (*list)->object.oid.hash, (int)hash_len); | ||
hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static const unsigned char *commit_to_sha1(size_t index, void *table) | ||
|
@@ -925,8 +936,8 @@ static const unsigned char *commit_to_sha1(size_t index, void *table) | |
return commits[index]->object.oid.hash; | ||
} | ||
|
||
static void write_graph_chunk_data(struct hashfile *f, int hash_len, | ||
struct write_commit_graph_context *ctx) | ||
static int write_graph_chunk_data(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
{ | ||
struct commit **list = ctx->commits.list; | ||
struct commit **last = ctx->commits.list + ctx->commits.nr; | ||
|
@@ -943,7 +954,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, | |
die(_("unable to parse commit %s"), | ||
oid_to_hex(&(*list)->object.oid)); | ||
tree = get_commit_tree_oid(*list); | ||
hashwrite(f, tree->hash, hash_len); | ||
hashwrite(f, tree->hash, the_hash_algo->rawsz); | ||
|
||
parent = (*list)->parents; | ||
|
||
|
@@ -1023,10 +1034,12 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, | |
|
||
list++; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static void write_graph_chunk_extra_edges(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
static int write_graph_chunk_extra_edges(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
{ | ||
struct commit **list = ctx->commits.list; | ||
struct commit **last = ctx->commits.list + ctx->commits.nr; | ||
|
@@ -1075,10 +1088,12 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, | |
|
||
list++; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static void write_graph_chunk_bloom_indexes(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
static int write_graph_chunk_bloom_indexes(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
{ | ||
struct commit **list = ctx->commits.list; | ||
struct commit **last = ctx->commits.list + ctx->commits.nr; | ||
|
@@ -1093,41 +1108,63 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, | |
|
||
while (list < last) { | ||
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); | ||
cur_pos += filter->len; | ||
size_t len = filter ? filter->len : 0; | ||
cur_pos += len; | ||
display_progress(progress, ++i); | ||
hashwrite_be32(f, cur_pos); | ||
list++; | ||
} | ||
|
||
stop_progress(&progress); | ||
return 0; | ||
} | ||
|
||
static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx) | ||
{ | ||
struct json_writer jw = JSON_WRITER_INIT; | ||
|
||
jw_object_begin(&jw, 0); | ||
jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version); | ||
jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes); | ||
jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry); | ||
jw_end(&jw); | ||
|
||
trace2_data_json("bloom", ctx->r, "settings", &jw); | ||
|
||
jw_release(&jw); | ||
} | ||
|
||
static void write_graph_chunk_bloom_data(struct hashfile *f, | ||
struct write_commit_graph_context *ctx, | ||
const struct bloom_filter_settings *settings) | ||
static int write_graph_chunk_bloom_data(struct hashfile *f, | ||
struct write_commit_graph_context *ctx) | ||
{ | ||
struct commit **list = ctx->commits.list; | ||
struct commit **last = ctx->commits.list + ctx->commits.nr; | ||
struct progress *progress = NULL; | ||
int i = 0; | ||
|
||
trace2_bloom_filter_settings(ctx); | ||
|
||
if (ctx->report_progress) | ||
progress = start_delayed_progress( | ||
_("Writing changed paths Bloom filters data"), | ||
ctx->commits.nr); | ||
|
||
hashwrite_be32(f, settings->hash_version); | ||
hashwrite_be32(f, settings->num_hashes); | ||
hashwrite_be32(f, settings->bits_per_entry); | ||
hashwrite_be32(f, ctx->bloom_settings->hash_version); | ||
hashwrite_be32(f, ctx->bloom_settings->num_hashes); | ||
hashwrite_be32(f, ctx->bloom_settings->bits_per_entry); | ||
|
||
while (list < last) { | ||
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); | ||
size_t len = filter ? filter->len : 0; | ||
display_progress(progress, ++i); | ||
hashwrite(f, filter->data, filter->len * sizeof(unsigned char)); | ||
|
||
if (len) | ||
hashwrite(f, filter->data, len * sizeof(unsigned char)); | ||
list++; | ||
} | ||
|
||
stop_progress(&progress); | ||
return 0; | ||
} | ||
|
||
static int oid_compare(const void *_a, const void *_b) | ||
|
@@ -1522,9 +1559,13 @@ static int write_graph_chunk_base(struct hashfile *f, | |
return 0; | ||
} | ||
|
||
typedef int (*chunk_write_fn)(struct hashfile *f, | ||
struct write_commit_graph_context *ctx); | ||
|
||
struct chunk_info { | ||
uint32_t id; | ||
uint64_t size; | ||
chunk_write_fn write_fn; | ||
}; | ||
|
||
static int write_commit_graph_file(struct write_commit_graph_context *ctx) | ||
|
@@ -1539,7 +1580,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) | |
int num_chunks = 3; | ||
uint64_t chunk_offset; | ||
struct object_id file_hash; | ||
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; | ||
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; | ||
|
||
if (!ctx->bloom_settings) { | ||
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", | ||
bloom_settings.bits_per_entry); | ||
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES", | ||
bloom_settings.num_hashes); | ||
ctx->bloom_settings = &bloom_settings; | ||
} | ||
|
||
if (ctx->split) { | ||
struct strbuf tmp_file = STRBUF_INIT; | ||
|
@@ -1579,27 +1628,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) | |
|
||
chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; | ||
chunks[0].size = GRAPH_FANOUT_SIZE; | ||
chunks[0].write_fn = write_graph_chunk_fanout; | ||
chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; | ||
chunks[1].size = hashsz * ctx->commits.nr; | ||
chunks[1].write_fn = write_graph_chunk_oids; | ||
chunks[2].id = GRAPH_CHUNKID_DATA; | ||
chunks[2].size = (hashsz + 16) * ctx->commits.nr; | ||
chunks[2].write_fn = write_graph_chunk_data; | ||
if (ctx->num_extra_edges) { | ||
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; | ||
chunks[num_chunks].size = 4 * ctx->num_extra_edges; | ||
chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; | ||
num_chunks++; | ||
} | ||
if (ctx->changed_paths) { | ||
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; | ||
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; | ||
chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; | ||
num_chunks++; | ||
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; | ||
chunks[num_chunks].size = sizeof(uint32_t) * 3 | ||
+ ctx->total_bloom_filter_data_size; | ||
chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; | ||
num_chunks++; | ||
} | ||
if (ctx->num_commit_graphs_after > 1) { | ||
chunks[num_chunks].id = GRAPH_CHUNKID_BASE; | ||
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); | ||
chunks[num_chunks].write_fn = write_graph_chunk_base; | ||
num_chunks++; | ||
} | ||
|
||
|
@@ -1635,19 +1691,19 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) | |
progress_title.buf, | ||
num_chunks * ctx->commits.nr); | ||
} | ||
write_graph_chunk_fanout(f, ctx); | ||
write_graph_chunk_oids(f, hashsz, ctx); | ||
write_graph_chunk_data(f, hashsz, ctx); | ||
if (ctx->num_extra_edges) | ||
write_graph_chunk_extra_edges(f, ctx); | ||
if (ctx->changed_paths) { | ||
write_graph_chunk_bloom_indexes(f, ctx); | ||
write_graph_chunk_bloom_data(f, ctx, &bloom_settings); | ||
} | ||
if (ctx->num_commit_graphs_after > 1 && | ||
write_graph_chunk_base(f, ctx)) { | ||
return -1; | ||
|
||
for (i = 0; i < num_chunks; i++) { | ||
uint64_t start_offset = f->total + f->offset; | ||
|
||
if (chunks[i].write_fn(f, ctx)) | ||
return -1; | ||
|
||
if (f->total + f->offset != start_offset + chunks[i].size) | ||
BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead", | ||
chunks[i].size, chunks[i].id, | ||
f->total + f->offset - start_offset); | ||
} | ||
|
||
stop_progress(&ctx->progress); | ||
strbuf_release(&progress_title); | ||
|
||
|
@@ -1964,9 +2020,23 @@ int write_commit_graph(struct object_directory *odb, | |
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; | ||
ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0; | ||
ctx->split_opts = split_opts; | ||
ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0; | ||
ctx->total_bloom_filter_data_size = 0; | ||
|
||
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) | ||
ctx->changed_paths = 1; | ||
if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { | ||
struct commit_graph *g; | ||
prepare_commit_graph_one(ctx->r, ctx->odb); | ||
|
||
g = ctx->r->objects->commit_graph; | ||
|
||
/* We have changed-paths already. Keep them in the next graph */ | ||
if (g && g->chunk_bloom_data) { | ||
ctx->changed_paths = 1; | ||
ctx->bloom_settings = g->bloom_filter_settings; | ||
} | ||
} | ||
|
||
if (ctx->split) { | ||
struct commit_graph *g; | ||
prepare_commit_graph(ctx->r); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the Git mailing list, SZEDER Gábor wrote (reply to this):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the Git mailing list, Taylor Blau wrote (reply to this):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the Git mailing list, Derrick Stolee wrote (reply to this):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the Git mailing list, Taylor Blau wrote (reply to this):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the Git mailing list, Derrick Stolee wrote (reply to this):