diff --git a/Documentation/config.txt b/Documentation/config.txt index 1ac0ae6adb0460..e528210c67a4c6 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3268,6 +3268,10 @@ uploadpack.packObjectsHook:: was run. I.e., `upload-pack` will feed input intended for `pack-objects` to the hook, and expects a completed packfile on stdout. + +uploadpack.allowFilter:: + If this option is set, `upload-pack` will advertise partial + clone and partial fetch object filtering. + Note that this configuration variable is ignored if it is seen in the repository-level config (this is a safety measure against fetching from diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 473a16135abf86..949d021a4cce7d 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -12,7 +12,8 @@ SYNOPSIS 'git pack-objects' [-q | --progress | --all-progress] [--all-progress-implied] [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=] [--depth=] - [--revs [--unpacked | --all]] [--stdout | base-name] + [--revs [--unpacked | --all]] + [--stdout [--filter=] | base-name] [--shallow] [--keep-true-parents] < object-list @@ -236,6 +237,15 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle. With this option, parents that are hidden by grafts are packed nevertheless. +--filter=:: + Requires `--stdout`. Omits certain objects (usually blobs) from + the resulting packfile. See linkgit:git-rev-list[1] for valid + `` forms. + +--exclude-promisor-objects:: + Silently omit referenced but missing objects from the packfile. + This is used with partial clone. + SEE ALSO -------- linkgit:git-rev-list[1] diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt index ef22f1775b6348..6d2e60dab34fe3 100644 --- a/Documentation/git-rev-list.txt +++ b/Documentation/git-rev-list.txt @@ -47,7 +47,10 @@ SYNOPSIS [ --fixed-strings | -F ] [ --date=] [ [ --objects | --objects-edge | --objects-edge-aggressive ] - [ --unpacked ] ] + [ --unpacked ] + [ --filter= ] ] + [ --filter-print-missing ] + [ --filter-print-omitted ] [ --pretty | --header ] [ --bisect ] [ --bisect-vars ] diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt index 4a584f3c5d7e40..bdfec8fd30e7b8 100644 --- a/Documentation/gitremote-helpers.txt +++ b/Documentation/gitremote-helpers.txt @@ -466,6 +466,12 @@ set by Git if the remote helper has the 'option' capability. Transmit as a push option. As the push option must not contain LF or NUL characters, the string is not encoded. +'option filter ':: + An object filter specification for partial clone or fetch + as described in rev-list. + +TODO document 'option from-promisor' and 'option no-haves' ? + SEE ALSO -------- linkgit:git-remote[1] diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 13501e1556e25b..414b60c83154ed 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -706,6 +706,36 @@ ifdef::git-rev-list[] --unpacked:: Only useful with `--objects`; print the object IDs that are not in packs. + +--filter=:: + Only useful with one of the `--objects*`; omits objects (usually + blobs) from the list of printed objects. The '' + may be one of the following: ++ +The form '--filter=blob:none' omits all blobs. ++ +The form '--filter=blob:limit=[kmg]' omits blobs larger than n bytes +or units. The value may be zero. Special files matching '.git*' are +alwayse included, regardless of size. ++ +The form '--filter=sparse:oid=' uses a sparse-checkout +specification contained in the object (or the object that the expression +evaluates to) to omit blobs not required by the corresponding sparse +checkout. ++ +The form '--filter=sparse:path=' similarly uses a sparse-checkout +specification contained in . + +--filter-print-missing:: + Prints a list of the missing objects for the requested traversal. + Object IDs are prefixed with a ``?'' character. The object type + is printed after the ID. This may be used with or without any of + the above filtering options. + +--filter-print-omitted:: + Only useful with one of the above `--filter*`; prints a list + of the omitted objects. Object IDs are prefixed with a ``~'' + character. endif::git-rev-list[] --no-walk[=(sorted|unsorted)]:: diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt index ed1eae8b83a651..a43a113e44f3d9 100644 --- a/Documentation/technical/pack-protocol.txt +++ b/Documentation/technical/pack-protocol.txt @@ -212,6 +212,7 @@ out of what the server said it could do with the first 'want' line. upload-request = want-list *shallow-line *1depth-request + [filter-request] flush-pkt want-list = first-want @@ -227,6 +228,8 @@ out of what the server said it could do with the first 'want' line. additional-want = PKT-LINE("want" SP obj-id) depth = 1*DIGIT + + filter-request = PKT-LINE("filter" SP filter-spec) ---- Clients MUST send all the obj-ids it wants from the reference @@ -249,6 +252,11 @@ complete those commits. Commits whose parents are not received as a result are defined as shallow and marked as such in the server. This information is sent back to the client in the next step. +The client can optionally request that pack-objects omit various +objects from the packfile using one of several filtering techniques. +These are intended for use with partial clone and partial fetch +operations. See `rev-list` for possible "filter-spec" values. + Once all the 'want's and 'shallow's (and optional 'deepen') are transferred, clients MUST send a flush-pkt, to tell the server side that it is done sending the list. diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt index 26dcc6f502020d..332d209b58ca42 100644 --- a/Documentation/technical/protocol-capabilities.txt +++ b/Documentation/technical/protocol-capabilities.txt @@ -309,3 +309,11 @@ to accept a signed push certificate, and asks the to be included in the push certificate. A send-pack client MUST NOT send a push-cert packet unless the receive-pack server advertises this capability. + +filter +------ + +If the upload-pack server advertises the 'filter' capability, +fetch-pack may send "filter" commands to request a partial clone +or partial fetch and request that the server omit various objects +from the packfile. diff --git a/Documentation/technical/repository-version.txt b/Documentation/technical/repository-version.txt index 00ad37986efdce..9d488dbbcade4d 100644 --- a/Documentation/technical/repository-version.txt +++ b/Documentation/technical/repository-version.txt @@ -86,3 +86,25 @@ for testing format-1 compatibility. When the config key `extensions.preciousObjects` is set to `true`, objects in the repository MUST NOT be deleted (e.g., by `git-prune` or `git repack -d`). + +`partialcloneremote` +~~~~~~~~~~~~~~~~~~~~ + +When the config key `extensions.partialcloneremote` is set, it indicates +that the repo was created with a partial clone (or later performed +a partial fetch) and that the remote may have omitted sending +certain unwanted objects. Such a remote is called a "promisor remote" +and it promises that all such omitted objects can be fetched from it +in the future. + +The value of this key is the name of the promisor remote. + +`partialclonefilter` +~~~~~~~~~~~~~~~~~~~~ + +When the config key `extensions.partialclonefilter` is set, it gives +the initial filter expression used to create the partial clone. +This value becomed the default filter expression for subsequent +fetches (called "partial fetches") from the promisor remote. This +value may also be set by the first explicit partial fetch following a +normal clone. diff --git a/Makefile b/Makefile index cd75985991f453..7a0679ab2d8720 100644 --- a/Makefile +++ b/Makefile @@ -792,6 +792,7 @@ LIB_OBJS += ewah/ewah_bitmap.o LIB_OBJS += ewah/ewah_io.o LIB_OBJS += ewah/ewah_rlw.o LIB_OBJS += exec_cmd.o +LIB_OBJS += fetch-object.o LIB_OBJS += fetch-pack.o LIB_OBJS += fsck.o LIB_OBJS += gettext.o @@ -807,6 +808,8 @@ LIB_OBJS += levenshtein.o LIB_OBJS += line-log.o LIB_OBJS += line-range.o LIB_OBJS += list-objects.o +LIB_OBJS += list-objects-filter.o +LIB_OBJS += list-objects-filter-options.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o @@ -836,6 +839,7 @@ LIB_OBJS += pack-write.o LIB_OBJS += pager.o LIB_OBJS += parse-options.o LIB_OBJS += parse-options-cb.o +LIB_OBJS += partial-clone-utils.o LIB_OBJS += patch-delta.o LIB_OBJS += patch-ids.o LIB_OBJS += path.o diff --git a/builtin/cat-file.c b/builtin/cat-file.c index f5fa4fd75af26a..ba77b733d6a206 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -13,6 +13,7 @@ #include "tree-walk.h" #include "sha1-array.h" #include "packfile.h" +#include "partial-clone-utils.h" struct batch_options { int enabled; @@ -475,6 +476,8 @@ static int batch_objects(struct batch_options *opt) for_each_loose_object(batch_loose_object, &sa, 0); for_each_packed_object(batch_packed_object, &sa, 0); + if (is_partial_clone_registered()) + warning("This repository has partial clone enabled. Some objects may not be loaded."); cb.opt = opt; cb.expand = &data; diff --git a/builtin/clone.c b/builtin/clone.c index dbddd98f80d666..08315d81189bae 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -26,6 +26,8 @@ #include "run-command.h" #include "connected.h" #include "packfile.h" +#include "list-objects-filter-options.h" +#include "partial-clone-utils.h" /* * Overall FIXMEs: @@ -60,6 +62,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; +static struct list_objects_filter_options filter_options; static int recurse_submodules_cb(const struct option *opt, const char *arg, int unset) @@ -135,6 +138,7 @@ static struct option builtin_clone_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -886,6 +890,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct refspec *refspec; const char *fetch_pattern; + fetch_if_missing = 0; + packet_trace_identity("clone"); argc = parse_options(argc, argv, prefix, builtin_clone_options, builtin_clone_usage, 0); @@ -1073,6 +1079,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) warning(_("--shallow-since is ignored in local clones; use file:// instead.")); if (option_not.nr) warning(_("--shallow-exclude is ignored in local clones; use file:// instead.")); + if (filter_options.choice) + warning(_("--filter is ignored in local clones; use file:// instead.")); if (!access(mkpath("%s/shallow", path), F_OK)) { if (option_local > 0) warning(_("source repository is shallow, ignoring --local")); @@ -1104,7 +1112,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_set_option(transport, TRANS_OPT_UPLOADPACK, option_upload_pack); - if (transport->smart_options && !deepen) + if (filter_options.choice) { + transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.raw_value); + transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + } + + if (transport->smart_options && !deepen && !filter_options.choice) transport->smart_options->check_self_contained_and_connected = 1; refs = transport_get_remote_refs(transport); @@ -1164,13 +1178,18 @@ int cmd_clone(int argc, const char **argv, const char *prefix) write_refspec_config(src_ref_prefix, our_head_points_at, remote_head_points_at, &branch_top); + if (filter_options.choice) + partial_clone_utils_register(&filter_options, "origin", + "clone"); + if (is_local) clone_local(path, git_dir); else if (refs && complete_refs_before_fetch) transport_fetch_refs(transport, mapped_refs); update_remote_refs(refs, mapped_refs, remote_head_points_at, - branch_top.buf, reflog_msg.buf, transport, !is_local); + branch_top.buf, reflog_msg.buf, transport, + !is_local && !filter_options.choice); update_head(our_head_points_at, remote_head, reflog_msg.buf); @@ -1191,6 +1210,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } junk_mode = JUNK_LEAVE_REPO; + fetch_if_missing = 1; err = checkout(submodule_progress); strbuf_release(&reflog_msg); diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 366b9d13f929b7..9ca04203f8e66c 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -53,6 +53,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) struct oid_array shallow = OID_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; + fetch_if_missing = 0; + packet_trace_identity("fetch-pack"); memset(&args, 0, sizeof(args)); @@ -143,6 +145,18 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.update_shallow = 1; continue; } + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&args.filter_options, arg); + continue; + } + if (!strcmp("--from-promisor", arg)) { + args.from_promisor = 1; + continue; + } + if (!strcmp("--no-haves", arg)) { + args.no_haves = 1; + continue; + } usage(fetch_pack_usage); } if (deepen_not.nr) diff --git a/builtin/fetch.c b/builtin/fetch.c index 225c734924f148..ab53df319186db 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -18,6 +18,8 @@ #include "argv-array.h" #include "utf8.h" #include "packfile.h" +#include "list-objects-filter-options.h" +#include "partial-clone-utils.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [] [ [...]]"), @@ -55,6 +57,7 @@ static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND; static int shown_url = 0; static int refmap_alloc, refmap_nr; static const char **refmap_array; +static struct list_objects_filter_options filter_options; static int git_fetch_config(const char *k, const char *v, void *cb) { @@ -160,6 +163,7 @@ static struct option builtin_fetch_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -754,6 +758,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, const char *filename = dry_run ? "/dev/null" : git_path_fetch_head(); int want_status; int summary_width = transport_summary_width(ref_map); + struct check_connected_options opt = CHECK_CONNECTED_INIT; fp = fopen(filename, "a"); if (!fp) @@ -765,7 +770,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, url = xstrdup("foreign"); rm = ref_map; - if (check_connected(iterate_ref_map, &rm, NULL)) { + if (check_connected(iterate_ref_map, &rm, &opt)) { rc = error(_("%s did not send all necessary objects\n"), url); goto abort; } @@ -1044,6 +1049,11 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes"); if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); + if (filter_options.choice) { + set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.raw_value); + set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + } return transport; } @@ -1242,6 +1252,20 @@ static int fetch_multiple(struct string_list *list) int i, result = 0; struct argv_array argv = ARGV_ARRAY_INIT; + if (filter_options.choice) { + /* + * We currently only support partial-fetches to the remote + * used for the partial-clone because we only support 1 + * promisor remote, so we DO NOT allow explicit command + * line filter arguments. + * + * Note that the loop below will spawn background fetches + * for each remote and one of them MAY INHERIT the proper + * partial-fetch settings, so everything is consistent. + */ + die(_("partial-fetch is not supported on multiple remotes")); + } + if (!append && !dry_run) { int errcode = truncate_fetch_head(); if (errcode) @@ -1267,6 +1291,44 @@ static int fetch_multiple(struct string_list *list) return result; } +static inline void partial_fetch_one_setup(struct remote *remote) +{ + if (filter_options.choice) { + /* + * A partial-fetch was explicitly requested. + * + * If this is the first partial-* command on + * this repo, we must register the partial + * settings in the repository extension. + * + * If this follows a previous partial-* command + * we must ensure the args are consistent with + * the existing registration (because we don't + * currently support mixing-and-matching). + */ + partial_clone_utils_register(&filter_options, + remote->name, "fetch"); + return; + } + + if (is_partial_clone_registered() && + !strcmp(remote->name, repository_format_partial_clone_remote)) { + /* + * If a partial-* command has already been used on + * this repo and it was to this remote, we should + * inherit the filter settings used previously. + * That is, if clone omitted very large blobs, then + * fetch should too. + * + * Use the cached filter-spec and create the filter + * settings. + */ + parse_list_objects_filter( + &filter_options, + repository_format_partial_clone_filter); + } +} + static int fetch_one(struct remote *remote, int argc, const char **argv) { static const char **refs = NULL; @@ -1278,6 +1340,9 @@ static int fetch_one(struct remote *remote, int argc, const char **argv) die(_("No remote repository specified. Please, specify either a URL or a\n" "remote name from which new revisions should be fetched.")); + partial_fetch_one_setup(remote); + + gtransport = prepare_transport(remote, 1); if (prune < 0) { @@ -1322,12 +1387,14 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) { int i; struct string_list list = STRING_LIST_INIT_DUP; - struct remote *remote; + struct remote *remote = NULL; int result = 0; struct argv_array argv_gc_auto = ARGV_ARRAY_INIT; packet_trace_identity("fetch"); + fetch_if_missing = 0; + /* Record the command line for the reflog */ strbuf_addstr(&default_rla, "fetch"); for (i = 1; i < argc; i++) @@ -1367,17 +1434,14 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) else if (argc > 1) die(_("fetch --all does not make sense with refspecs")); (void) for_each_remote(get_one_remote_for_fetch, &list); - result = fetch_multiple(&list); } else if (argc == 0) { /* No arguments -- use default remote */ remote = remote_get(NULL); - result = fetch_one(remote, argc, argv); } else if (multiple) { /* All arguments are assumed to be remotes or groups */ for (i = 0; i < argc; i++) if (!add_remote_or_group(argv[i], &list)) die(_("No such remote or remote group: %s"), argv[i]); - result = fetch_multiple(&list); } else { /* Single remote or group */ (void) add_remote_or_group(argv[0], &list); @@ -1385,14 +1449,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) /* More than one remote */ if (argc > 1) die(_("Fetching a group and specifying refspecs does not make sense")); - result = fetch_multiple(&list); } else { /* Zero or one remotes */ remote = remote_get(argv[0]); - result = fetch_one(remote, argc-1, argv+1); + argc--; + argv++; } } + if (remote) + result = fetch_one(remote, argc, argv); + else + result = fetch_multiple(&list); + if (!result && (recurse_submodules != RECURSE_SUBMODULES_OFF)) { struct argv_array options = ARGV_ARRAY_INIT; diff --git a/builtin/fsck.c b/builtin/fsck.c index 56afe405b8072b..3b76c0ef0f4bed 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -149,6 +149,15 @@ static int mark_object(struct object *obj, int type, void *data, struct fsck_opt if (obj->flags & REACHABLE) return 0; obj->flags |= REACHABLE; + + if (is_promisor_object(&obj->oid)) + /* + * Further recursion does not need to be performed on this + * object since it is a promisor object (so it does not need to + * be added to "pending"). + */ + return 0; + if (!(obj->flags & HAS_OBJ)) { if (parent && !has_object_file(&obj->oid)) { printf("broken link from %7s %s\n", @@ -208,6 +217,8 @@ static void check_reachable_object(struct object *obj) * do a full fsck */ if (!(obj->flags & HAS_OBJ)) { + if (is_promisor_object(&obj->oid)) + return; if (has_sha1_pack(obj->oid.hash)) return; /* it is in pack - forget about it */ printf("missing %s %s\n", printable_type(obj), @@ -398,7 +409,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid, xstrfmt("%s@{%"PRItime"}", refname, timestamp)); obj->flags |= USED; mark_object_reachable(obj); - } else { + } else if (!is_promisor_object(oid)) { error("%s: invalid reflog entry %s", refname, oid_to_hex(oid)); errors_found |= ERROR_REACHABLE; } @@ -434,6 +445,14 @@ static int fsck_handle_ref(const char *refname, const struct object_id *oid, obj = parse_object(oid); if (!obj) { + if (is_promisor_object(oid)) { + /* + * Increment default_refs anyway, because this is a + * valid ref. + */ + default_refs++; + return 0; + } error("%s: invalid sha1 pointer %s", refname, oid_to_hex(oid)); errors_found |= ERROR_REACHABLE; /* We'll continue with the rest despite the error.. */ @@ -659,6 +678,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) int i; struct alternate_object_database *alt; + /* fsck knows how to handle missing promisor objects */ + fetch_if_missing = 0; + errors_found = 0; check_replace_refs = 0; @@ -731,6 +753,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) struct object *obj = lookup_object(oid.hash); if (!obj || !(obj->flags & HAS_OBJ)) { + if (is_promisor_object(&oid)) + continue; error("%s: object missing", oid_to_hex(&oid)); errors_found |= ERROR_OBJECT; continue; diff --git a/builtin/gc.c b/builtin/gc.c index 3c5eae0edf12e4..a17806add37279 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -20,6 +20,7 @@ #include "argv-array.h" #include "commit.h" #include "packfile.h" +#include "partial-clone-utils.h" #define FAILED_RUN "failed to run %s" @@ -458,6 +459,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) argv_array_push(&prune, prune_expire); if (quiet) argv_array_push(&prune, "--no-progress"); + if (is_partial_clone_registered()) + argv_array_push(&prune, + "--exclude-promisor-objects"); if (run_command_v_opt(prune.argv, RUN_GIT_CMD)) return error(FAILED_RUN, prune.argv[0]); } diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 8ec459f5225228..51693dcf4fd8dd 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -82,6 +82,7 @@ static int verbose; static int show_resolving_progress; static int show_stat; static int check_self_contained_and_connected; +static int arg_promisor_given; static struct progress *progress; @@ -222,6 +223,17 @@ static unsigned check_object(struct object *obj) if (!(obj->flags & FLAG_CHECKED)) { unsigned long size; int type = sha1_object_info(obj->oid.hash, &size); + + if (type <= 0 && arg_promisor_given) { + /* + * Assume this missing object is promised. We can't + * confirm it because we are indexing the packfile + * that omitted it. + */ + obj->flags |= FLAG_CHECKED; + return 0; + } + if (type <= 0) die(_("did not receive expected object %s"), oid_to_hex(&obj->oid)); @@ -1389,15 +1401,60 @@ static void fix_unresolved_deltas(struct sha1file *f) free(sorted_by_pos); } +static const char *derive_filename(const char *pack_name, const char *suffix, + struct strbuf *buf) +{ + size_t len; + if (!strip_suffix(pack_name, ".pack", &len)) + die(_("packfile name '%s' does not end with '.pack'"), + pack_name); + strbuf_add(buf, pack_name, len); + strbuf_addch(buf, '.'); + strbuf_addstr(buf, suffix); + return buf->buf; +} + +static void write_special_file(const char *suffix, const char *msg, + const char *pack_name, const unsigned char *sha1, + const char **report) +{ + struct strbuf name_buf = STRBUF_INIT; + const char *filename; + int fd; + int msg_len = strlen(msg); + + if (pack_name) + filename = derive_filename(pack_name, suffix, &name_buf); + else + filename = odb_pack_name(&name_buf, sha1, suffix); + + fd = odb_pack_keep(filename); + if (fd < 0) { + if (errno != EEXIST) + die_errno(_("cannot write %s file '%s'"), + suffix, filename); + } else { + if (msg_len > 0) { + write_or_die(fd, msg, msg_len); + write_or_die(fd, "\n", 1); + } + if (close(fd) != 0) + die_errno(_("cannot close written %s file '%s'"), + suffix, filename); + if (report) + *report = suffix; + } + strbuf_release(&name_buf); +} + static void final(const char *final_pack_name, const char *curr_pack_name, const char *final_index_name, const char *curr_index_name, - const char *keep_name, const char *keep_msg, + const char *keep_msg, const char *promisor_msg, unsigned char *sha1) { const char *report = "pack"; struct strbuf pack_name = STRBUF_INIT; struct strbuf index_name = STRBUF_INIT; - struct strbuf keep_name_buf = STRBUF_INIT; int err; if (!from_stdin) { @@ -1409,28 +1466,12 @@ static void final(const char *final_pack_name, const char *curr_pack_name, die_errno(_("error while closing pack file")); } - if (keep_msg) { - int keep_fd, keep_msg_len = strlen(keep_msg); - - if (!keep_name) - keep_name = odb_pack_name(&keep_name_buf, sha1, "keep"); - - keep_fd = odb_pack_keep(keep_name); - if (keep_fd < 0) { - if (errno != EEXIST) - die_errno(_("cannot write keep file '%s'"), - keep_name); - } else { - if (keep_msg_len > 0) { - write_or_die(keep_fd, keep_msg, keep_msg_len); - write_or_die(keep_fd, "\n", 1); - } - if (close(keep_fd) != 0) - die_errno(_("cannot close written keep file '%s'"), - keep_name); - report = "keep"; - } - } + if (keep_msg) + write_special_file("keep", keep_msg, final_pack_name, sha1, + &report); + if (promisor_msg) + write_special_file("promisor", promisor_msg, final_pack_name, + sha1, NULL); if (final_pack_name != curr_pack_name) { if (!final_pack_name) @@ -1472,7 +1513,6 @@ static void final(const char *final_pack_name, const char *curr_pack_name, strbuf_release(&index_name); strbuf_release(&pack_name); - strbuf_release(&keep_name_buf); } static int git_index_pack_config(const char *k, const char *v, void *cb) @@ -1615,32 +1655,26 @@ static void show_pack_info(int stat_only) } } -static const char *derive_filename(const char *pack_name, const char *suffix, - struct strbuf *buf) -{ - size_t len; - if (!strip_suffix(pack_name, ".pack", &len)) - die(_("packfile name '%s' does not end with '.pack'"), - pack_name); - strbuf_add(buf, pack_name, len); - strbuf_addstr(buf, suffix); - return buf->buf; -} - int cmd_index_pack(int argc, const char **argv, const char *prefix) { int i, fix_thin_pack = 0, verify = 0, stat_only = 0; const char *curr_index; const char *index_name = NULL, *pack_name = NULL; - const char *keep_name = NULL, *keep_msg = NULL; - struct strbuf index_name_buf = STRBUF_INIT, - keep_name_buf = STRBUF_INIT; + const char *keep_msg = NULL; + const char *promisor_msg = NULL; + struct strbuf index_name_buf = STRBUF_INIT; struct pack_idx_entry **idx_objects; struct pack_idx_option opts; unsigned char pack_sha1[20]; unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */ int report_end_of_input = 0; + /* + * index-pack never needs to fetch missing objects, since it only + * accesses the repo to do hash collision checks + */ + fetch_if_missing = 0; + if (argc == 2 && !strcmp(argv[1], "-h")) usage(index_pack_usage); @@ -1683,6 +1717,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) keep_msg = ""; } else if (starts_with(arg, "--keep=")) { keep_msg = arg + 7; + } else if (!strcmp(arg, "--promisor")) { + promisor_msg = ""; + arg_promisor_given = 1; + } else if (starts_with(arg, "--promisor=")) { + promisor_msg = arg + strlen("--promisor="); + arg_promisor_given = 1; } else if (starts_with(arg, "--threads=")) { char *end; nr_threads = strtoul(arg+10, &end, 0); @@ -1745,9 +1785,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (from_stdin && !startup_info->have_repository) die(_("--stdin requires a git repository")); if (!index_name && pack_name) - index_name = derive_filename(pack_name, ".idx", &index_name_buf); - if (keep_msg && !keep_name && pack_name) - keep_name = derive_filename(pack_name, ".keep", &keep_name_buf); + index_name = derive_filename(pack_name, "idx", &index_name_buf); if (verify) { if (!index_name) @@ -1795,13 +1833,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (!verify) final(pack_name, curr_pack, index_name, curr_index, - keep_name, keep_msg, + keep_msg, promisor_msg, pack_sha1); else close(input_fd); free(objects); strbuf_release(&index_name_buf); - strbuf_release(&keep_name_buf); if (pack_name == NULL) free((void *) curr_pack); if (index_name == NULL) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6e77dfd44439f4..b8c9956f497573 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -15,6 +15,8 @@ #include "diff.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" #include "pack-objects.h" #include "progress.h" #include "refs.h" @@ -73,12 +75,16 @@ static int use_bitmap_index = -1; static int write_bitmap_index; static uint16_t write_bitmap_options; +static int exclude_promisor_objects; + static unsigned long delta_cache_size = 0; static unsigned long max_delta_cache_size = 256 * 1024 * 1024; static unsigned long cache_max_small_delta_size = 1000; static unsigned long window_memory_limit = 0; +static struct list_objects_filter_options filter_options; + /* * stats */ @@ -2547,6 +2553,11 @@ static void show_commit(struct commit *commit, void *data) static void show_object(struct object *obj, const char *name, void *data) { + if (exclude_promisor_objects && + !has_object_file(&obj->oid) && + is_promisor_object(&obj->oid)) + return; + add_preferred_base_object(name); add_object_entry(obj->oid.hash, obj->type, name, 0); obj->flags |= OBJECT_ADDED; @@ -2816,7 +2827,10 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die("revision walk setup failed"); mark_edges_uninteresting(&revs, show_edge); - traverse_commit_list(&revs, show_commit, show_object, NULL); + + traverse_commit_list_filtered(&filter_options, &revs, + show_commit, show_object, NULL, + NULL); if (unpack_unreachable_expiration) { revs.ignore_missing_links = 1; @@ -2952,6 +2966,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("use a bitmap index if available to speed up counting objects")), OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index, N_("write a bitmap index together with the pack index")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), + OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects, + N_("do not pack objects in promisor packfiles")), OPT_END(), }; @@ -2997,6 +3014,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) argv_array_push(&rp, "--unpacked"); } + if (exclude_promisor_objects) { + use_internal_rev_list = 1; + fetch_if_missing = 0; + argv_array_push(&rp, "--exclude-promisor-objects"); + } + if (!reuse_object) reuse_delta = 0; if (pack_compression_level == -1) @@ -3028,6 +3051,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; + if (filter_options.choice) { + if (!pack_to_stdout) + die("cannot use filtering with an indexable pack."); + use_bitmap_index = 0; + } + /* * "soft" reasons not to use bitmaps - for on-disk repack by default we want * diff --git a/builtin/prune.c b/builtin/prune.c index cddabf26a95cc2..be34645dcfa6c1 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -101,12 +101,15 @@ int cmd_prune(int argc, const char **argv, const char *prefix) { struct rev_info revs; struct progress *progress = NULL; + int exclude_promisor_objects = 0; const struct option options[] = { OPT__DRY_RUN(&show_only, N_("do not remove, show only")), OPT__VERBOSE(&verbose, N_("report pruned objects")), OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_EXPIRY_DATE(0, "expire", &expire, N_("expire objects older than