-
Notifications
You must be signed in to change notification settings - Fork 2.7k
[WORK-IN-PROGRESS] Introduce the path walk API into Git for Windows #5146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
6354d7a
c8e08c3
b05a276
e02f7b3
4236e4f
31c9b45
356abc9
3a421ff
b9471b6
c4b3490
ca37a49
0a20a17
91c4d57
53632be
c63928e
3e9b671
af7d53f
d192ae7
5f7e131
ab0bc08
bd8b5b5
c6d4832
c2092f0
bbc57f7
32fca07
c145b9e
72191a0
5039f03
e43582c
88fee5b
d17e503
d7e7283
98a5786
9d0690a
556335a
69aa8d8
5001883
3ab1bda
84c8a06
16cd9a3
c8f1239
b5c2265
fee8f88
489ce0c
9b78d40
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
/* | ||
* path-walk.c: implementation for path-based walks of the object graph. | ||
*/ | ||
#include "git-compat-util.h" | ||
#include "path-walk.h" | ||
#include "blob.h" | ||
#include "commit.h" | ||
#include "dir.h" | ||
#include "hashmap.h" | ||
#include "hex.h" | ||
#include "object.h" | ||
#include "oid-array.h" | ||
#include "revision.h" | ||
#include "string-list.h" | ||
#include "strmap.h" | ||
#include "trace2.h" | ||
#include "tree.h" | ||
#include "tree-walk.h" | ||
|
||
struct type_and_oid_list | ||
{ | ||
enum object_type type; | ||
struct oid_array oids; | ||
}; | ||
|
||
#define TYPE_AND_OID_LIST_INIT { \ | ||
.type = OBJ_NONE, \ | ||
.oids = OID_ARRAY_INIT \ | ||
} | ||
|
||
struct path_walk_context { | ||
/** | ||
* Repeats of data in 'struct path_walk_info' for | ||
* access with fewer characters. | ||
*/ | ||
struct repository *repo; | ||
struct rev_info *revs; | ||
struct path_walk_info *info; | ||
|
||
/** | ||
* Map a path to a 'struct type_and_oid_list' | ||
* containing the objects discovered at that | ||
* path. | ||
*/ | ||
struct strmap paths_to_lists; | ||
|
||
/** | ||
* Store the current list of paths in a stack, to | ||
* facilitate depth-first-search without recursion. | ||
*/ | ||
struct string_list path_stack; | ||
}; | ||
|
||
static int add_children(struct path_walk_context *ctx, | ||
const char *base_path, | ||
struct object_id *oid) | ||
{ | ||
struct tree_desc desc; | ||
struct name_entry entry; | ||
struct strbuf path = STRBUF_INIT; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For performance reasons, it might make sense to pass in |
||
size_t base_len; | ||
struct tree *tree = lookup_tree(ctx->repo, oid); | ||
|
||
if (!tree) { | ||
error(_("failed to walk children of tree %s: not found"), | ||
oid_to_hex(oid)); | ||
return -1; | ||
} | ||
|
||
strbuf_addstr(&path, base_path); | ||
base_len = path.len; | ||
|
||
parse_tree(tree); | ||
init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); | ||
while (tree_entry(&desc, &entry)) { | ||
struct type_and_oid_list *list; | ||
struct object *o; | ||
/* Not actually true, but we will ignore submodules later. */ | ||
dscho marked this conversation as resolved.
Show resolved
Hide resolved
|
||
enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; | ||
|
||
/* Skip submodules. */ | ||
if (S_ISGITLINK(entry.mode)) | ||
continue; | ||
|
||
if (type == OBJ_TREE) { | ||
struct tree *child = lookup_tree(ctx->repo, &entry.oid); | ||
o = child ? &child->object : NULL; | ||
} else if (type == OBJ_BLOB) { | ||
struct blob *child = lookup_blob(ctx->repo, &entry.oid); | ||
o = child ? &child->object : NULL; | ||
} else { | ||
/* Wrong type? */ | ||
continue; | ||
} | ||
|
||
if (!o) /* report error?*/ | ||
continue; | ||
|
||
/* Skip this object if already seen. */ | ||
if (o->flags & SEEN) | ||
continue; | ||
o->flags |= SEEN; | ||
|
||
strbuf_setlen(&path, base_len); | ||
strbuf_add(&path, entry.path, entry.pathlen); | ||
|
||
/* | ||
* Trees will end with "/" for concatenation and distinction | ||
* from blobs at the same path. | ||
*/ | ||
if (type == OBJ_TREE) | ||
strbuf_addch(&path, '/'); | ||
|
||
if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { | ||
CALLOC_ARRAY(list, 1); | ||
list->type = type; | ||
strmap_put(&ctx->paths_to_lists, path.buf, list); | ||
string_list_append(&ctx->path_stack, path.buf); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We might want to add this, out of an abundance of caution:
|
||
oid_array_append(&list->oids, &entry.oid); | ||
} | ||
|
||
free_tree_buffer(tree); | ||
strbuf_release(&path); | ||
return 0; | ||
} | ||
|
||
/* | ||
* For each path in paths_to_explore, walk the trees another level | ||
* and add any found blobs to the batch (but only if they don't | ||
* exist and haven't been added yet). | ||
*/ | ||
static int walk_path(struct path_walk_context *ctx, | ||
const char *path) | ||
{ | ||
struct type_and_oid_list *list; | ||
int ret = 0; | ||
|
||
list = strmap_get(&ctx->paths_to_lists, path); | ||
|
||
/* Evaluate function pointer on this data. */ | ||
ret = ctx->info->path_fn(path, &list->oids, list->type, | ||
ctx->info->path_fn_data); | ||
|
||
/* Expand data for children. */ | ||
if (list->type == OBJ_TREE) { | ||
for (size_t i = 0; i < list->oids.nr; i++) { | ||
ret |= add_children(ctx, | ||
Comment on lines
+210
to
+211
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Continuing on errors may be the right thing to do always. On the off-chance that a user may want to return early on error, we may want to add a flag for that. |
||
path, | ||
&list->oids.oid[i]); | ||
} | ||
} | ||
|
||
oid_array_clear(&list->oids); | ||
strmap_remove(&ctx->paths_to_lists, path, 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder whether this is safe. If the same
where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is important to realize that a path appears at most once. The walk goes like this:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, the crucial part I was missing is that we first walk all commits, build up the huge, giant list for "" (i.e. all the top-level trees). Then we iterate over all of those before going any further. Crucially, no single tree walk happens until every last of the commits has been walked. For some reason, I got the impression that we're doing a depth-first search where we immediately iterate into the full tree of the first commit, until we exhausted all of the blobs reachable from that root tree, and only then walk to the next commit. But that's not what we do ;-) We basically walk all the paths in a depth-first manner, but not in alphabetical order because we end up plucking off the paths from the top of the stack (i.e. the "alphabetically last" item seen so far, but that only holds true if the root tree has no child trees, otherwise the order gets messy real quickly). So the order is somewhat jumbled, I guess ;-) Thank you for explaining this so patiently. I think my confusion might be a good motivation to add a little primer about the order to the commit message of the "path-walk: introduce an object walk by path" commit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was working on a technical doc for the API but don't have it committed or pushed anywhere. This confirms that it is necessary. |
||
return ret; | ||
} | ||
|
||
static void clear_strmap(struct strmap *map) | ||
{ | ||
struct hashmap_iter iter; | ||
struct strmap_entry *e; | ||
|
||
hashmap_for_each_entry(&map->map, &iter, e, ent) { | ||
struct type_and_oid_list *list = e->value; | ||
oid_array_clear(&list->oids); | ||
} | ||
strmap_clear(map, 1); | ||
strmap_init(map); | ||
} | ||
|
||
/** | ||
* Given the configuration of 'info', walk the commits based on 'info->revs' and | ||
* call 'info->path_fn' on each discovered path. | ||
* | ||
* Returns nonzero on an error. | ||
*/ | ||
int walk_objects_by_path(struct path_walk_info *info) | ||
{ | ||
const char *root_path = ""; | ||
int ret = 0; | ||
size_t commits_nr = 0, paths_nr = 0; | ||
struct commit *c; | ||
struct type_and_oid_list *root_tree_list; | ||
struct path_walk_context ctx = { | ||
.repo = info->revs->repo, | ||
.revs = info->revs, | ||
.info = info, | ||
.path_stack = STRING_LIST_INIT_DUP, | ||
.paths_to_lists = STRMAP_INIT | ||
}; | ||
|
||
trace2_region_enter("path-walk", "commit-walk", info->revs->repo); | ||
|
||
/* Insert a single list for the root tree into the paths. */ | ||
CALLOC_ARRAY(root_tree_list, 1); | ||
root_tree_list->type = OBJ_TREE; | ||
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); | ||
|
||
if (prepare_revision_walk(info->revs)) | ||
die(_("failed to setup revision walk")); | ||
|
||
while ((c = get_revision(info->revs))) { | ||
struct object_id *oid = get_commit_tree_oid(c); | ||
struct tree *t = lookup_tree(info->revs->repo, oid); | ||
commits_nr++; | ||
|
||
if (t) | ||
oid_array_append(&root_tree_list->oids, oid); | ||
else | ||
warning("could not find tree %s", oid_to_hex(oid)); | ||
} | ||
|
||
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); | ||
trace2_region_leave("path-walk", "commit-walk", info->revs->repo); | ||
|
||
string_list_append(&ctx.path_stack, root_path); | ||
|
||
trace2_region_enter("path-walk", "path-walk", info->revs->repo); | ||
while (!ret && ctx.path_stack.nr) { | ||
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; | ||
ctx.path_stack.nr--; | ||
paths_nr++; | ||
|
||
ret = walk_path(&ctx, path); | ||
|
||
free(path); | ||
} | ||
trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); | ||
trace2_region_leave("path-walk", "path-walk", info->revs->repo); | ||
|
||
clear_strmap(&ctx.paths_to_lists); | ||
string_list_clear(&ctx.path_stack, 0); | ||
return ret; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* path-walk.h : Methods and structures for walking the object graph in batches | ||
* by the paths that can reach those objects. | ||
*/ | ||
#include "object.h" /* Required for 'enum object_type'. */ | ||
|
||
struct rev_info; | ||
struct oid_array; | ||
|
||
/** | ||
* The type of a function pointer for the method that is called on a list of | ||
* objects reachable at a given path. | ||
*/ | ||
typedef int (*path_fn)(const char *path, | ||
struct oid_array *oids, | ||
enum object_type type, | ||
void *data); | ||
|
||
struct path_walk_info { | ||
/** | ||
* revs provides the definitions for the commit walk, including | ||
* which commits are UNINTERESTING or not. | ||
*/ | ||
struct rev_info *revs; | ||
|
||
/** | ||
* The caller wishes to execute custom logic on objects reachable at a | ||
* given path. Every reachable object will be visited exactly once, and | ||
* the first path to see an object wins. This may not be a stable choice. | ||
*/ | ||
path_fn path_fn; | ||
void *path_fn_data; | ||
}; | ||
|
||
#define PATH_WALK_INFO_INIT { 0 } | ||
|
||
/** | ||
* Given the configuration of 'info', walk the commits based on 'info->revs' and | ||
* call 'info->path_fn' on each discovered path. | ||
* | ||
* Returns nonzero on an error. | ||
*/ | ||
int walk_objects_by_path(struct path_walk_info *info); |
Uh oh!
There was an error while loading. Please reload this page.