Skip to content

Commit 70cdbbe

Browse files
committed
Merge branch 'ds/commit-graph-bloom-updates' into master
Updates to the changed-paths bloom filter. * ds/commit-graph-bloom-updates: commit-graph: check all leading directories in changed path Bloom filters revision: empty pathspecs should not use Bloom filters revision.c: fix whitespace commit-graph: check chunk sizes after writing commit-graph: simplify chunk writes into loop commit-graph: unify the signatures of all write_graph_chunk_*() functions commit-graph: persist existence of changed-paths bloom: fix logic in get_bloom_filter() commit-graph: change test to die on parse, not load commit-graph: place bloom_settings in context
2 parents de6dda0 + c525ce9 commit 70cdbbe

9 files changed

+217
-74
lines changed

Documentation/git-commit-graph.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ existing commit-graph file.
6262
With the `--changed-paths` option, compute and write information about the
6363
paths changed between a commit and its first parent. This operation can
6464
take a while on large repositories. It provides significant performance gains
65-
for getting history of a directory or a file with `git log -- <path>`.
65+
for getting history of a directory or a file with `git log -- <path>`. If
66+
this option is given, future commit-graph writes will automatically assume
67+
that this option was intended. Use `--no-changed-paths` to stop storing this
68+
data.
6669
+
6770
With the `--split[=<strategy>]` option, write the commit-graph as a
6871
chain of multiple commit-graph files stored in

bloom.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -187,24 +187,22 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
187187
struct diff_options diffopt;
188188
int max_changes = 512;
189189

190-
if (bloom_filters.slab_size == 0)
190+
if (!bloom_filters.slab_size)
191191
return NULL;
192192

193193
filter = bloom_filter_slab_at(&bloom_filters, c);
194194

195195
if (!filter->data) {
196196
load_commit_graph_info(r, c);
197197
if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH &&
198-
r->objects->commit_graph->chunk_bloom_indexes) {
199-
if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
200-
return filter;
201-
else
202-
return NULL;
203-
}
198+
r->objects->commit_graph->chunk_bloom_indexes)
199+
load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
204200
}
205201

206-
if (filter->data || !compute_if_not_present)
202+
if (filter->data)
207203
return filter;
204+
if (!compute_if_not_present)
205+
return NULL;
208206

209207
repo_diff_setup(r, &diffopt);
210208
diffopt.flags.recursive = 1;

builtin/commit-graph.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ static int graph_write(int argc, const char **argv)
201201
};
202202

203203
opts.progress = isatty(2);
204+
opts.enable_changed_paths = -1;
204205
split_opts.size_multiple = 2;
205206
split_opts.max_commits = 0;
206207
split_opts.expire_time = 0;
@@ -221,7 +222,9 @@ static int graph_write(int argc, const char **argv)
221222
flags |= COMMIT_GRAPH_WRITE_SPLIT;
222223
if (opts.progress)
223224
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
224-
if (opts.enable_changed_paths ||
225+
if (!opts.enable_changed_paths)
226+
flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS;
227+
if (opts.enable_changed_paths == 1 ||
225228
git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
226229
flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
227230

commit-graph.c

Lines changed: 110 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "bloom.h"
1818
#include "commit-slab.h"
1919
#include "shallow.h"
20+
#include "json-writer.h"
21+
#include "trace2.h"
2022

2123
void git_test_write_commit_graph_or_die(void)
2224
{
@@ -617,10 +619,6 @@ static int prepare_commit_graph(struct repository *r)
617619
return !!r->objects->commit_graph;
618620
r->objects->commit_graph_attempted = 1;
619621

620-
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0))
621-
die("dying as requested by the '%s' variable on commit-graph load!",
622-
GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD);
623-
624622
prepare_repo_settings(r);
625623

626624
if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
@@ -849,6 +847,14 @@ static int parse_commit_in_graph_one(struct repository *r,
849847

850848
int parse_commit_in_graph(struct repository *r, struct commit *item)
851849
{
850+
static int checked_env = 0;
851+
852+
if (!checked_env &&
853+
git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
854+
die("dying as requested by the '%s' variable on commit-graph parse!",
855+
GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
856+
checked_env = 1;
857+
852858
if (!prepare_commit_graph(r))
853859
return 0;
854860
return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
@@ -941,10 +947,11 @@ struct write_commit_graph_context {
941947

942948
const struct split_commit_graph_opts *split_opts;
943949
size_t total_bloom_filter_data_size;
950+
const struct bloom_filter_settings *bloom_settings;
944951
};
945952

946-
static void write_graph_chunk_fanout(struct hashfile *f,
947-
struct write_commit_graph_context *ctx)
953+
static int write_graph_chunk_fanout(struct hashfile *f,
954+
struct write_commit_graph_context *ctx)
948955
{
949956
int i, count = 0;
950957
struct commit **list = ctx->commits.list;
@@ -965,17 +972,21 @@ static void write_graph_chunk_fanout(struct hashfile *f,
965972

966973
hashwrite_be32(f, count);
967974
}
975+
976+
return 0;
968977
}
969978

970-
static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
971-
struct write_commit_graph_context *ctx)
979+
static int write_graph_chunk_oids(struct hashfile *f,
980+
struct write_commit_graph_context *ctx)
972981
{
973982
struct commit **list = ctx->commits.list;
974983
int count;
975984
for (count = 0; count < ctx->commits.nr; count++, list++) {
976985
display_progress(ctx->progress, ++ctx->progress_cnt);
977-
hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
986+
hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz);
978987
}
988+
989+
return 0;
979990
}
980991

981992
static const unsigned char *commit_to_sha1(size_t index, void *table)
@@ -984,8 +995,8 @@ static const unsigned char *commit_to_sha1(size_t index, void *table)
984995
return commits[index]->object.oid.hash;
985996
}
986997

987-
static void write_graph_chunk_data(struct hashfile *f, int hash_len,
988-
struct write_commit_graph_context *ctx)
998+
static int write_graph_chunk_data(struct hashfile *f,
999+
struct write_commit_graph_context *ctx)
9891000
{
9901001
struct commit **list = ctx->commits.list;
9911002
struct commit **last = ctx->commits.list + ctx->commits.nr;
@@ -1002,7 +1013,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
10021013
die(_("unable to parse commit %s"),
10031014
oid_to_hex(&(*list)->object.oid));
10041015
tree = get_commit_tree_oid(*list);
1005-
hashwrite(f, tree->hash, hash_len);
1016+
hashwrite(f, tree->hash, the_hash_algo->rawsz);
10061017

10071018
parent = (*list)->parents;
10081019

@@ -1082,10 +1093,12 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
10821093

10831094
list++;
10841095
}
1096+
1097+
return 0;
10851098
}
10861099

1087-
static void write_graph_chunk_extra_edges(struct hashfile *f,
1088-
struct write_commit_graph_context *ctx)
1100+
static int write_graph_chunk_extra_edges(struct hashfile *f,
1101+
struct write_commit_graph_context *ctx)
10891102
{
10901103
struct commit **list = ctx->commits.list;
10911104
struct commit **last = ctx->commits.list + ctx->commits.nr;
@@ -1134,41 +1147,67 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
11341147

11351148
list++;
11361149
}
1150+
1151+
return 0;
11371152
}
11381153

1139-
static void write_graph_chunk_bloom_indexes(struct hashfile *f,
1140-
struct write_commit_graph_context *ctx)
1154+
static int write_graph_chunk_bloom_indexes(struct hashfile *f,
1155+
struct write_commit_graph_context *ctx)
11411156
{
11421157
struct commit **list = ctx->commits.list;
11431158
struct commit **last = ctx->commits.list + ctx->commits.nr;
11441159
uint32_t cur_pos = 0;
11451160

11461161
while (list < last) {
11471162
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1148-
cur_pos += filter->len;
1163+
size_t len = filter ? filter->len : 0;
1164+
cur_pos += len;
11491165
display_progress(ctx->progress, ++ctx->progress_cnt);
11501166
hashwrite_be32(f, cur_pos);
11511167
list++;
11521168
}
1169+
1170+
return 0;
11531171
}
11541172

1155-
static void write_graph_chunk_bloom_data(struct hashfile *f,
1156-
struct write_commit_graph_context *ctx,
1157-
const struct bloom_filter_settings *settings)
1173+
static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
1174+
{
1175+
struct json_writer jw = JSON_WRITER_INIT;
1176+
1177+
jw_object_begin(&jw, 0);
1178+
jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
1179+
jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
1180+
jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
1181+
jw_end(&jw);
1182+
1183+
trace2_data_json("bloom", ctx->r, "settings", &jw);
1184+
1185+
jw_release(&jw);
1186+
}
1187+
1188+
static int write_graph_chunk_bloom_data(struct hashfile *f,
1189+
struct write_commit_graph_context *ctx)
11581190
{
11591191
struct commit **list = ctx->commits.list;
11601192
struct commit **last = ctx->commits.list + ctx->commits.nr;
11611193

1162-
hashwrite_be32(f, settings->hash_version);
1163-
hashwrite_be32(f, settings->num_hashes);
1164-
hashwrite_be32(f, settings->bits_per_entry);
1194+
trace2_bloom_filter_settings(ctx);
1195+
1196+
hashwrite_be32(f, ctx->bloom_settings->hash_version);
1197+
hashwrite_be32(f, ctx->bloom_settings->num_hashes);
1198+
hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
11651199

11661200
while (list < last) {
11671201
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1202+
size_t len = filter ? filter->len : 0;
1203+
11681204
display_progress(ctx->progress, ++ctx->progress_cnt);
1169-
hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
1205+
if (len)
1206+
hashwrite(f, filter->data, len * sizeof(unsigned char));
11701207
list++;
11711208
}
1209+
1210+
return 0;
11721211
}
11731212

11741213
static int oid_compare(const void *_a, const void *_b)
@@ -1579,9 +1618,13 @@ static int write_graph_chunk_base(struct hashfile *f,
15791618
return 0;
15801619
}
15811620

1621+
typedef int (*chunk_write_fn)(struct hashfile *f,
1622+
struct write_commit_graph_context *ctx);
1623+
15821624
struct chunk_info {
15831625
uint32_t id;
15841626
uint64_t size;
1627+
chunk_write_fn write_fn;
15851628
};
15861629

15871630
static int write_commit_graph_file(struct write_commit_graph_context *ctx)
@@ -1596,7 +1639,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
15961639
int num_chunks = 3;
15971640
uint64_t chunk_offset;
15981641
struct object_id file_hash;
1599-
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
1642+
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
1643+
1644+
if (!ctx->bloom_settings) {
1645+
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
1646+
bloom_settings.bits_per_entry);
1647+
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
1648+
bloom_settings.num_hashes);
1649+
ctx->bloom_settings = &bloom_settings;
1650+
}
16001651

16011652
if (ctx->split) {
16021653
struct strbuf tmp_file = STRBUF_INIT;
@@ -1644,27 +1695,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
16441695

16451696
chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
16461697
chunks[0].size = GRAPH_FANOUT_SIZE;
1698+
chunks[0].write_fn = write_graph_chunk_fanout;
16471699
chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
16481700
chunks[1].size = hashsz * ctx->commits.nr;
1701+
chunks[1].write_fn = write_graph_chunk_oids;
16491702
chunks[2].id = GRAPH_CHUNKID_DATA;
16501703
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
1704+
chunks[2].write_fn = write_graph_chunk_data;
16511705
if (ctx->num_extra_edges) {
16521706
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
16531707
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
1708+
chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
16541709
num_chunks++;
16551710
}
16561711
if (ctx->changed_paths) {
16571712
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
16581713
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
1714+
chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
16591715
num_chunks++;
16601716
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
16611717
chunks[num_chunks].size = sizeof(uint32_t) * 3
16621718
+ ctx->total_bloom_filter_data_size;
1719+
chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
16631720
num_chunks++;
16641721
}
16651722
if (ctx->num_commit_graphs_after > 1) {
16661723
chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
16671724
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
1725+
chunks[num_chunks].write_fn = write_graph_chunk_base;
16681726
num_chunks++;
16691727
}
16701728

@@ -1700,19 +1758,19 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
17001758
progress_title.buf,
17011759
num_chunks * ctx->commits.nr);
17021760
}
1703-
write_graph_chunk_fanout(f, ctx);
1704-
write_graph_chunk_oids(f, hashsz, ctx);
1705-
write_graph_chunk_data(f, hashsz, ctx);
1706-
if (ctx->num_extra_edges)
1707-
write_graph_chunk_extra_edges(f, ctx);
1708-
if (ctx->changed_paths) {
1709-
write_graph_chunk_bloom_indexes(f, ctx);
1710-
write_graph_chunk_bloom_data(f, ctx, &bloom_settings);
1711-
}
1712-
if (ctx->num_commit_graphs_after > 1 &&
1713-
write_graph_chunk_base(f, ctx)) {
1714-
return -1;
1761+
1762+
for (i = 0; i < num_chunks; i++) {
1763+
uint64_t start_offset = f->total + f->offset;
1764+
1765+
if (chunks[i].write_fn(f, ctx))
1766+
return -1;
1767+
1768+
if (f->total + f->offset != start_offset + chunks[i].size)
1769+
BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
1770+
chunks[i].size, chunks[i].id,
1771+
f->total + f->offset - start_offset);
17151772
}
1773+
17161774
stop_progress(&ctx->progress);
17171775
strbuf_release(&progress_title);
17181776

@@ -2046,9 +2104,23 @@ int write_commit_graph(struct object_directory *odb,
20462104
ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
20472105
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
20482106
ctx->split_opts = split_opts;
2049-
ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
20502107
ctx->total_bloom_filter_data_size = 0;
20512108

2109+
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
2110+
ctx->changed_paths = 1;
2111+
if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
2112+
struct commit_graph *g;
2113+
prepare_commit_graph_one(ctx->r, ctx->odb);
2114+
2115+
g = ctx->r->objects->commit_graph;
2116+
2117+
/* We have changed-paths already. Keep them in the next graph */
2118+
if (g && g->chunk_bloom_data) {
2119+
ctx->changed_paths = 1;
2120+
ctx->bloom_settings = g->bloom_filter_settings;
2121+
}
2122+
}
2123+
20522124
if (ctx->split) {
20532125
struct commit_graph *g;
20542126
prepare_commit_graph(ctx->r);

commit-graph.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include "oidset.h"
77

88
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
9-
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
9+
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
1010
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
1111

1212
/*
@@ -92,6 +92,7 @@ enum commit_graph_write_flags {
9292
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
9393
COMMIT_GRAPH_WRITE_SPLIT = (1 << 2),
9494
COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
95+
COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4),
9596
};
9697

9798
enum commit_graph_split_flags {

0 commit comments

Comments
 (0)