Skip to content

Commit f70d861

Browse files
committed
Merge branch 'ds/full-name-hash' into seen
"git pack-objects" and its wrapper "git repack" learned an option to use an alternative path-hash function to improve delta-base selection to produce a packfile with deeper history than window size. * ds/full-name-hash: test-tool: add helper for name-hash values pack-objects: disable --full-name-hash when shallow p5313: add size comparison test git-repack: update usage to match docs pack-objects: add GIT_TEST_FULL_NAME_HASH repack: add --full-name-hash option pack-objects: add --full-name-hash option
2 parents 43d9ec5 + 991ae1c commit f70d861

23 files changed

+355
-13
lines changed

Documentation/git-pack-objects.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ SYNOPSIS
1515
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
1616
[--cruft] [--cruft-expiration=<time>]
1717
[--stdout [--filter=<filter-spec>] | <base-name>]
18-
[--shallow] [--keep-true-parents] [--[no-]sparse] < <object-list>
18+
[--shallow] [--keep-true-parents] [--[no-]sparse]
19+
[--full-name-hash] < <object-list>
1920

2021

2122
DESCRIPTION

Documentation/git-repack.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ git-repack - Pack unpacked objects in a repository
99
SYNOPSIS
1010
--------
1111
[verse]
12-
'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>] [--write-midx]
12+
'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]
13+
[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]
14+
[--write-midx] [--full-name-hash]
1315

1416
DESCRIPTION
1517
-----------

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,7 @@ TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
822822
TEST_BUILTINS_OBJS += test-match-trees.o
823823
TEST_BUILTINS_OBJS += test-mergesort.o
824824
TEST_BUILTINS_OBJS += test-mktemp.o
825+
TEST_BUILTINS_OBJS += test-name-hash.o
825826
TEST_BUILTINS_OBJS += test-online-cpus.o
826827
TEST_BUILTINS_OBJS += test-pack-mtimes.o
827828
TEST_BUILTINS_OBJS += test-parse-options.o

builtin/pack-objects.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,14 @@ struct configured_exclusion {
267267
static struct oidmap configured_exclusions;
268268

269269
static struct oidset excluded_by_config;
270+
static int use_full_name_hash = -1;
271+
272+
static inline uint32_t pack_name_hash_fn(const char *name)
273+
{
274+
if (use_full_name_hash)
275+
return pack_full_name_hash(name);
276+
return pack_name_hash(name);
277+
}
270278

271279
/*
272280
* stats
@@ -1699,7 +1707,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
16991707
return 0;
17001708
}
17011709

1702-
create_object_entry(oid, type, pack_name_hash(name),
1710+
create_object_entry(oid, type, pack_name_hash_fn(name),
17031711
exclude, name && no_try_delta(name),
17041712
found_pack, found_offset);
17051713
return 1;
@@ -1913,7 +1921,7 @@ static void add_preferred_base_object(const char *name)
19131921
{
19141922
struct pbase_tree *it;
19151923
size_t cmplen;
1916-
unsigned hash = pack_name_hash(name);
1924+
unsigned hash = pack_name_hash_fn(name);
19171925

19181926
if (!num_preferred_base || check_pbase_path(hash))
19191927
return;
@@ -3423,7 +3431,7 @@ static void show_object_pack_hint(struct object *object, const char *name,
34233431
* here using a now in order to perhaps improve the delta selection
34243432
* process.
34253433
*/
3426-
oe->hash = pack_name_hash(name);
3434+
oe->hash = pack_name_hash_fn(name);
34273435
oe->no_try_delta = name && no_try_delta(name);
34283436

34293437
stdin_packs_hints_nr++;
@@ -3573,7 +3581,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
35733581
entry = packlist_find(&to_pack, oid);
35743582
if (entry) {
35753583
if (name) {
3576-
entry->hash = pack_name_hash(name);
3584+
entry->hash = pack_name_hash_fn(name);
35773585
entry->no_try_delta = no_try_delta(name);
35783586
}
35793587
} else {
@@ -3596,7 +3604,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
35963604
return;
35973605
}
35983606

3599-
entry = create_object_entry(oid, type, pack_name_hash(name),
3607+
entry = create_object_entry(oid, type, pack_name_hash_fn(name),
36003608
0, name && no_try_delta(name),
36013609
pack, offset);
36023610
}
@@ -4445,6 +4453,8 @@ int cmd_pack_objects(int argc,
44454453
OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
44464454
N_("protocol"),
44474455
N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4456+
OPT_BOOL(0, "full-name-hash", &use_full_name_hash,
4457+
N_("optimize delta compression across identical path names over time")),
44484458
OPT_END(),
44494459
};
44504460

@@ -4600,6 +4610,20 @@ int cmd_pack_objects(int argc,
46004610
if (pack_to_stdout || !rev_list_all)
46014611
write_bitmap_index = 0;
46024612

4613+
if (use_full_name_hash < 0)
4614+
use_full_name_hash = git_env_bool("GIT_TEST_FULL_NAME_HASH", 0);
4615+
4616+
if (shallow && use_full_name_hash > 0 &&
4617+
!git_env_bool("GIT_TEST_USE_FULL_NAME_HASH_WITH_SHALLOW", 0)) {
4618+
use_full_name_hash = 0;
4619+
warning("the --full-name-hash option is disabled with the --shallow option");
4620+
}
4621+
4622+
if (write_bitmap_index && use_full_name_hash > 0) {
4623+
warning(_("currently, the --full-name-hash option is incompatible with --write-bitmap-index"));
4624+
use_full_name_hash = 0;
4625+
}
4626+
46034627
if (use_delta_islands)
46044628
strvec_push(&rp, "--topo-order");
46054629

builtin/repack.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ static int run_update_server_info = 1;
3939
static char *packdir, *packtmp_name, *packtmp;
4040

4141
static const char *const git_repack_usage[] = {
42-
N_("git repack [<options>]"),
42+
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
43+
"[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n"
44+
"[--write-midx] [--full-name-hash]"),
4345
NULL
4446
};
4547

@@ -58,6 +60,7 @@ struct pack_objects_args {
5860
int no_reuse_object;
5961
int quiet;
6062
int local;
63+
int full_name_hash;
6164
struct list_objects_filter_options filter_options;
6265
};
6366

@@ -306,6 +309,8 @@ static void prepare_pack_objects(struct child_process *cmd,
306309
strvec_pushf(&cmd->args, "--no-reuse-delta");
307310
if (args->no_reuse_object)
308311
strvec_pushf(&cmd->args, "--no-reuse-object");
312+
if (args->full_name_hash)
313+
strvec_pushf(&cmd->args, "--full-name-hash");
309314
if (args->local)
310315
strvec_push(&cmd->args, "--local");
311316
if (args->quiet)
@@ -1203,6 +1208,8 @@ int cmd_repack(int argc,
12031208
N_("pass --no-reuse-delta to git-pack-objects")),
12041209
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
12051210
N_("pass --no-reuse-object to git-pack-objects")),
1211+
OPT_BOOL(0, "full-name-hash", &po_args.full_name_hash,
1212+
N_("pass --full-name-hash to git-pack-objects")),
12061213
OPT_NEGBIT('n', NULL, &run_update_server_info,
12071214
N_("do not run git-update-server-info"), 1),
12081215
OPT__QUIET(&po_args.quiet, N_("be quiet")),

ci/run-build-and-tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ linux-TEST-vars)
3030
export GIT_TEST_NO_WRITE_REV_INDEX=1
3131
export GIT_TEST_CHECKOUT_WORKERS=2
3232
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
33+
export GIT_TEST_FULL_NAME_HASH=1
3334
;;
3435
linux-clang)
3536
export GIT_TEST_DEFAULT_HASH=sha1

pack-objects.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,27 @@ static inline uint32_t pack_name_hash(const char *name)
207207
return hash;
208208
}
209209

210+
static inline uint32_t pack_full_name_hash(const char *name)
211+
{
212+
const uint32_t bigp = 1234572167U;
213+
uint32_t c, hash = bigp;
214+
215+
if (!name)
216+
return 0;
217+
218+
/*
219+
* Do the simplest thing that will resemble pseudo-randomness: add
220+
* random multiples of a large prime number with a binary shift.
221+
* The goal is not to be cryptographic, but to be generally
222+
* uniformly distributed.
223+
*/
224+
while ((c = *name++) != 0) {
225+
hash += c * bigp;
226+
hash = (hash >> 5) | (hash << 27);
227+
}
228+
return hash;
229+
}
230+
210231
static inline enum object_type oe_type(const struct object_entry *e)
211232
{
212233
return e->type_valid ? e->type_ : OBJ_BAD;

t/README

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,10 @@ a test and then fails then the whole test run will abort. This can help to make
492492
sure the expected tests are executed and not silently skipped when their
493493
dependency breaks or is simply not present in a new environment.
494494

495+
GIT_TEST_FULL_NAME_HASH=<boolean>, when true, sets the default name-hash
496+
function in 'git pack-objects' to be the one used by the --full-name-hash
497+
option.
498+
495499
Naming Tests
496500
------------
497501

t/helper/test-name-hash.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* test-name-hash.c: Read a list of paths over stdin and report on their
3+
* name-hash and full name-hash.
4+
*/
5+
6+
#include "test-tool.h"
7+
#include "git-compat-util.h"
8+
#include "pack-objects.h"
9+
#include "strbuf.h"
10+
11+
int cmd__name_hash(int argc UNUSED, const char **argv UNUSED)
12+
{
13+
struct strbuf line = STRBUF_INIT;
14+
15+
while (!strbuf_getline(&line, stdin)) {
16+
uint32_t name_hash = pack_name_hash(line.buf);
17+
uint32_t full_hash = pack_full_name_hash(line.buf);
18+
19+
printf("%10"PRIu32"\t%10"PRIu32"\t%s\n", name_hash, full_hash, line.buf);
20+
}
21+
22+
strbuf_release(&line);
23+
return 0;
24+
}

t/helper/test-tool.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ static struct test_cmd cmds[] = {
4444
{ "match-trees", cmd__match_trees },
4545
{ "mergesort", cmd__mergesort },
4646
{ "mktemp", cmd__mktemp },
47+
{ "name-hash", cmd__name_hash },
4748
{ "online-cpus", cmd__online_cpus },
4849
{ "pack-mtimes", cmd__pack_mtimes },
4950
{ "parse-options", cmd__parse_options },

t/helper/test-tool.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv);
3737
int cmd__match_trees(int argc, const char **argv);
3838
int cmd__mergesort(int argc, const char **argv);
3939
int cmd__mktemp(int argc, const char **argv);
40+
int cmd__name_hash(int argc, const char **argv);
4041
int cmd__online_cpus(int argc, const char **argv);
4142
int cmd__pack_mtimes(int argc, const char **argv);
4243
int cmd__parse_options(int argc, const char **argv);

t/perf/p5313-pack-objects.sh

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#!/bin/sh
2+
3+
test_description='Tests pack performance using bitmaps'
4+
. ./perf-lib.sh
5+
6+
GIT_TEST_PASSING_SANITIZE_LEAK=0
7+
export GIT_TEST_PASSING_SANITIZE_LEAK
8+
9+
test_perf_large_repo
10+
11+
test_expect_success 'create rev input' '
12+
cat >in-thin <<-EOF &&
13+
$(git rev-parse HEAD)
14+
^$(git rev-parse HEAD~1)
15+
EOF
16+
17+
cat >in-big <<-EOF &&
18+
$(git rev-parse HEAD)
19+
^$(git rev-parse HEAD~1000)
20+
EOF
21+
22+
cat >in-shallow <<-EOF
23+
$(git rev-parse HEAD)
24+
--shallow $(git rev-parse HEAD)
25+
EOF
26+
'
27+
28+
test_perf 'thin pack' '
29+
git pack-objects --thin --stdout --revs --sparse <in-thin >out
30+
'
31+
32+
test_size 'thin pack size' '
33+
test_file_size out
34+
'
35+
36+
test_perf 'thin pack with --full-name-hash' '
37+
git pack-objects --thin --stdout --revs --sparse --full-name-hash <in-thin >out
38+
'
39+
40+
test_size 'thin pack size with --full-name-hash' '
41+
test_file_size out
42+
'
43+
44+
test_perf 'big pack' '
45+
git pack-objects --stdout --revs --sparse <in-big >out
46+
'
47+
48+
test_size 'big pack size' '
49+
test_file_size out
50+
'
51+
52+
test_perf 'big pack with --full-name-hash' '
53+
git pack-objects --stdout --revs --sparse --full-name-hash <in-big >out
54+
'
55+
56+
test_size 'big pack size with --full-name-hash' '
57+
test_file_size out
58+
'
59+
60+
test_perf 'shallow fetch pack' '
61+
git pack-objects --stdout --revs --sparse --shallow <in-shallow >out
62+
'
63+
64+
test_size 'shallow pack size' '
65+
test_file_size out
66+
'
67+
68+
test_perf 'shallow pack with --full-name-hash' '
69+
GIT_TEST_USE_FULL_NAME_HASH_WITH_SHALLOW=1 \
70+
git pack-objects --stdout --revs --sparse --shallow --full-name-hash <in-shallow >out
71+
'
72+
73+
test_size 'shallow pack size with --full-name-hash' '
74+
test_file_size out
75+
'
76+
77+
test_perf 'repack' '
78+
git repack -adf
79+
'
80+
81+
test_size 'repack size' '
82+
pack=$(ls .git/objects/pack/pack-*.pack) &&
83+
test_file_size "$pack"
84+
'
85+
86+
test_perf 'repack with --full-name-hash' '
87+
git repack -adf --full-name-hash
88+
'
89+
90+
test_size 'repack size with --full-name-hash' '
91+
pack=$(ls .git/objects/pack/pack-*.pack) &&
92+
test_file_size "$pack"
93+
'
94+
95+
test_done

t/perf/p5314-name-hash.sh

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/sh
2+
3+
test_description='Tests pack performance using bitmaps'
4+
. ./perf-lib.sh
5+
6+
GIT_TEST_PASSING_SANITIZE_LEAK=0
7+
export GIT_TEST_PASSING_SANITIZE_LEAK
8+
9+
test_perf_large_repo
10+
11+
test_size 'paths at head' '
12+
git ls-tree -r --name-only HEAD >path-list &&
13+
wc -l <path-list
14+
'
15+
16+
test_size 'number of distinct name-hashes' '
17+
cat path-list | test-tool name-hash >name-hashes &&
18+
cat name-hashes | awk "{ print \$1; }" | sort -n | uniq -c >name-hash-count &&
19+
wc -l <name-hash-count
20+
'
21+
22+
test_size 'number of distinct full-name-hashes' '
23+
cat name-hashes | awk "{ print \$2; }" | sort -n | uniq -c >full-name-hash-count &&
24+
wc -l <full-name-hash-count
25+
'
26+
27+
test_size 'maximum multiplicity of name-hashes' '
28+
cat name-hash-count | \
29+
sort -nr | \
30+
head -n 1 | \
31+
awk "{ print \$1; }"
32+
'
33+
34+
test_size 'maximum multiplicity of fullname-hashes' '
35+
cat full-name-hash-count | \
36+
sort -nr | \
37+
head -n 1 | \
38+
awk "{ print \$1; }"
39+
'
40+
41+
test_done

t/t0450/txt-help-mismatches

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ rebase
4545
remote
4646
remote-ext
4747
remote-fd
48-
repack
4948
reset
5049
restore
5150
rev-parse

0 commit comments

Comments
 (0)