Skip to content

Commit 6eb3258

Browse files
committed
pack-objects: allow --path-walk with --shallow (#699)
This pull request aims to correct a pretty big issue when dealing with UNINTERESTING objects in the path-walk API. They somehow were only exposed when trying to perform a push from a shallow clone. This will require rewriting the upstream version so this is avoided from the start, but we can do a forward fix for now. The key issue is that the path-walk API was not walking UNINTERESTING trees at the right time, and the way it was being done was more complicated than it needed to be. This changes some of the way the path-walk API works in the presence of UNINTERSTING commits, but these are good changes to make. I had briefly attempted to remove the use of the `edge_aggressive` option in `struct path_walk_info` in favor of using the `--objects-edge-aggressive` option in the revision struct. When I started down that road, though, I somehow got myself into a bind of things not working correctly. I backed out to this version that is working with our test cases. I tested this using the thin and big pack tests in `p5313` which had the same performance as before this change. The new change is that in a shallow clone we can get the same `git push` improvements. I was hung up on testing this for a long time as I wasn't getting the same results in my shallow clone as in my regular clones. It turns out that I had forgotten to use `--no-reuse-delta` in my test command, so it was picking the deltas that were given by the initial clone instead of picking new ones per the algorithm. 🤦🏻
2 parents 51f47a6 + 19094f4 commit 6eb3258

File tree

7 files changed

+96
-38
lines changed

7 files changed

+96
-38
lines changed

Documentation/technical/api-path-walk.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ better off using the revision walk API instead.
6565
the revision walk so that the walk emits commits marked with the
6666
`UNINTERESTING` flag.
6767

68+
`edge_aggressive`::
69+
For performance reasons, usually only the boundary commits are
70+
explored to find UNINTERESTING objects. However, in the case of
71+
shallow clones it can be helpful to mark all trees and blobs
72+
reachable from UNINTERESTING tip commits as UNINTERESTING. This
73+
matches the behavior of `--objects-edge-aggressive` in the
74+
revision API.
75+
6876
`pl`::
6977
This pattern list pointer allows focusing the path-walk search to
7078
a set of patterns, only emitting paths that match the given

builtin/pack-objects.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ static int keep_unreachable, unpack_unreachable, include_tag;
203203
static timestamp_t unpack_unreachable_expiration;
204204
static int pack_loose_unreachable;
205205
static int cruft;
206+
static int shallow = 0;
206207
static timestamp_t cruft_expiration;
207208
static int local;
208209
static int have_non_local_packs;
@@ -4429,6 +4430,7 @@ static void get_object_list_path_walk(struct rev_info *revs)
44294430
* base objects.
44304431
*/
44314432
info.prune_all_uninteresting = sparse;
4433+
info.edge_aggressive = shallow;
44324434

44334435
if (walk_objects_by_path(&info))
44344436
die(_("failed to pack objects via path-walk"));
@@ -4630,7 +4632,6 @@ int cmd_pack_objects(int argc,
46304632
struct repository *repo UNUSED)
46314633
{
46324634
int use_internal_rev_list = 0;
4633-
int shallow = 0;
46344635
int all_progress_implied = 0;
46354636
struct strvec rp = STRVEC_INIT;
46364637
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
@@ -4818,10 +4819,6 @@ int cmd_pack_objects(int argc,
48184819
warning(_("cannot use delta islands with --path-walk"));
48194820
path_walk = 0;
48204821
}
4821-
if (path_walk && shallow) {
4822-
warning(_("cannot use --shallow with --path-walk"));
4823-
path_walk = 0;
4824-
}
48254822
if (path_walk) {
48264823
strvec_push(&rp, "--boundary");
48274824
/*

path-walk.c

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "trace2.h"
1919
#include "tree.h"
2020
#include "tree-walk.h"
21+
#include "list-objects.h"
2122

2223
struct type_and_oid_list
2324
{
@@ -233,6 +234,26 @@ static void clear_strmap(struct strmap *map)
233234
strmap_init(map);
234235
}
235236

237+
static struct repository *edge_repo;
238+
static struct type_and_oid_list *edge_tree_list;
239+
240+
static void show_edge(struct commit *commit)
241+
{
242+
struct tree *t = repo_get_commit_tree(edge_repo, commit);
243+
244+
if (!t)
245+
return;
246+
247+
if (commit->object.flags & UNINTERESTING)
248+
t->object.flags |= UNINTERESTING;
249+
250+
if (t->object.flags & SEEN)
251+
return;
252+
t->object.flags |= SEEN;
253+
254+
oid_array_append(&edge_tree_list->oids, &t->object.oid);
255+
}
256+
236257
/**
237258
* Given the configuration of 'info', walk the commits based on 'info->revs' and
238259
* call 'info->path_fn' on each discovered path.
@@ -242,7 +263,7 @@ static void clear_strmap(struct strmap *map)
242263
int walk_objects_by_path(struct path_walk_info *info)
243264
{
244265
const char *root_path = "";
245-
int ret = 0, has_uninteresting = 0;
266+
int ret = 0;
246267
size_t commits_nr = 0, paths_nr = 0;
247268
struct commit *c;
248269
struct type_and_oid_list *root_tree_list;
@@ -254,7 +275,6 @@ int walk_objects_by_path(struct path_walk_info *info)
254275
.path_stack = STRING_LIST_INIT_DUP,
255276
.paths_to_lists = STRMAP_INIT
256277
};
257-
struct oidset root_tree_set = OIDSET_INIT;
258278

259279
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
260280

@@ -280,6 +300,18 @@ int walk_objects_by_path(struct path_walk_info *info)
280300
if (prepare_revision_walk(info->revs))
281301
die(_("failed to setup revision walk"));
282302

303+
/*
304+
* Do an initial walk of tip commits in info->revs->commits and
305+
* info->revs->cmdline.rev to match the standard edge-walk behavior.
306+
*
307+
* This is particularly important when 'edge_aggressive' is set.
308+
*/
309+
info->revs->edge_hint_aggressive = info->edge_aggressive;
310+
311+
edge_repo = info->revs->repo;
312+
edge_tree_list = root_tree_list;
313+
mark_edges_uninteresting(info->revs, show_edge, info->prune_all_uninteresting);
314+
283315
info->revs->blob_objects = info->revs->tree_objects = 0;
284316

285317
if (info->tags) {
@@ -366,17 +398,10 @@ int walk_objects_by_path(struct path_walk_info *info)
366398
if (t->object.flags & SEEN)
367399
continue;
368400
t->object.flags |= SEEN;
369-
370-
if (!oidset_insert(&root_tree_set, oid))
371-
oid_array_append(&root_tree_list->oids, oid);
401+
oid_array_append(&root_tree_list->oids, oid);
372402
} else {
373403
warning("could not find tree %s", oid_to_hex(oid));
374404
}
375-
376-
if (t && (c->object.flags & UNINTERESTING)) {
377-
t->object.flags |= UNINTERESTING;
378-
has_uninteresting = 1;
379-
}
380405
}
381406

382407
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
@@ -389,21 +414,6 @@ int walk_objects_by_path(struct path_walk_info *info)
389414
oid_array_clear(&commit_list->oids);
390415
free(commit_list);
391416

392-
/*
393-
* Before performing a DFS of our paths and emitting them as interesting,
394-
* do a full walk of the trees to distribute the UNINTERESTING bit. Use
395-
* the sparse algorithm if prune_all_uninteresting was set.
396-
*/
397-
if (has_uninteresting) {
398-
trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
399-
if (info->prune_all_uninteresting)
400-
mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
401-
else
402-
mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
403-
trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
404-
}
405-
oidset_clear(&root_tree_set);
406-
407417
string_list_append(&ctx.path_stack, root_path);
408418

409419
trace2_region_enter("path-walk", "path-walk", info->revs->repo);

path-walk.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ struct path_walk_info {
4848
*/
4949
int prune_all_uninteresting;
5050

51+
/**
52+
* When 'edge_aggressive' is set, then the revision walk will use
53+
* the '--object-edge-aggressive' option to mark even more objects
54+
* as uninteresting.
55+
*/
56+
int edge_aggressive;
57+
5158
/**
5259
* Specify a sparse-checkout definition to match our paths to. Do not
5360
* walk outside of this sparse definition. If the patterns are in

t/t5538-push-shallow.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,38 @@ EOF
123123
git cat-file blob $(echo 1|git hash-object --stdin) >/dev/null
124124
)
125125
'
126+
127+
test_expect_success 'push new commit from shallow clone has correct object count' '
128+
git init origin &&
129+
test_commit -C origin a &&
130+
test_commit -C origin b &&
131+
132+
git clone --depth=1 "file://$(pwd)/origin" client &&
133+
git -C client checkout -b topic &&
134+
git -C client commit --allow-empty -m "empty" &&
135+
GIT_PROGRESS_DELAY=0 git -C client push --progress origin topic 2>err &&
136+
test_grep "Enumerating objects: 1, done." err
137+
'
138+
139+
test_expect_success 'push new commit from shallow clone has good deltas' '
140+
git init base &&
141+
test_seq 1 999 >base/a &&
142+
test_commit -C base initial &&
143+
git -C base add a &&
144+
git -C base commit -m "big a" &&
145+
146+
git clone --depth=1 "file://$(pwd)/base" deltas &&
147+
git -C deltas checkout -b deltas &&
148+
test_seq 1 1000 >deltas/a &&
149+
git -C deltas commit -a -m "bigger a" &&
150+
GIT_TRACE2_PERF="$(pwd)/trace.txt" \
151+
GIT_PROGRESS_DELAY=0 git -C deltas push --progress origin deltas 2>err &&
152+
153+
test_grep "Enumerating objects: 5, done" err &&
154+
155+
# If the delta base is found, then this message uses "bytes".
156+
# If the delta base is not found, then this message uses "KiB".
157+
test_grep "Writing objects: .* bytes" err
158+
'
159+
126160
test_done

t/t5616-partial-clone.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas' '
526526
# used as delta bases!
527527
GIT_TRACE_PACKET="$(pwd)/trace" \
528528
GIT_TEST_FULL_NAME_HASH=0 \
529+
GIT_TEST_PACK_PATH_WALK=0 \
529530
git -C client \
530531
fetch "file://$(pwd)/server" main &&
531532
@@ -556,6 +557,7 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' '
556557
# used as delta bases!
557558
GIT_TRACE_PACKET="$(pwd)/trace" \
558559
GIT_TEST_FULL_NAME_HASH=0 \
560+
GIT_TEST_PACK_PATH_WALK=0 \
559561
git -C client \
560562
fetch "file://$(pwd)/server" main &&
561563

t/t6601-path-walk.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,13 @@ test_expect_success 'topic, not base' '
181181
COMMIT::$(git rev-parse topic)
182182
commits:1
183183
TREE::$(git rev-parse topic^{tree})
184-
TREE:left/:$(git rev-parse topic:left)
184+
TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
185185
TREE:right/:$(git rev-parse topic:right)
186186
trees:3
187-
BLOB:a:$(git rev-parse topic:a)
188-
BLOB:left/b:$(git rev-parse topic:left/b)
187+
BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
188+
BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
189189
BLOB:right/c:$(git rev-parse topic:right/c)
190-
BLOB:right/d:$(git rev-parse topic:right/d)
190+
BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
191191
blobs:4
192192
tags:0
193193
EOF
@@ -205,10 +205,10 @@ test_expect_success 'topic, not base, only blobs' '
205205
cat >expect <<-EOF &&
206206
commits:0
207207
trees:0
208-
BLOB:a:$(git rev-parse topic:a)
209-
BLOB:left/b:$(git rev-parse topic:left/b)
208+
BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
209+
BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
210210
BLOB:right/c:$(git rev-parse topic:right/c)
211-
BLOB:right/d:$(git rev-parse topic:right/d)
211+
BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
212212
blobs:4
213213
tags:0
214214
EOF
@@ -246,7 +246,7 @@ test_expect_success 'topic, not base, only trees' '
246246
cat >expect <<-EOF &&
247247
commits:0
248248
TREE::$(git rev-parse topic^{tree})
249-
TREE:left/:$(git rev-parse topic:left)
249+
TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
250250
TREE:right/:$(git rev-parse topic:right)
251251
trees:3
252252
blobs:0

0 commit comments

Comments
 (0)