Skip to content

Harden gvfs-helper to validate the packfiles in a multipart prefetch response #571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 32 additions & 34 deletions gvfs-helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@
// The GVFS Protocol defines this value as a way to
// request cached packfiles NEWER THAN this timestamp.
//
// --max-retries=<n> // defaults to "6"
//
// Number of retries after transient network errors.
// Set to zero to disable such retries.
//
// server
//
// Interactive/sub-process mode. Listen for a series of commands
Expand Down Expand Up @@ -2116,7 +2121,6 @@ static void extract_packfile_from_multipack(
{
struct ph ph;
struct tempfile *tempfile_pack = NULL;
struct tempfile *tempfile_idx = NULL;
int result = -1;
int b_no_idx_in_multipack;
struct object_id packfile_checksum;
Expand Down Expand Up @@ -2150,16 +2154,14 @@ static void extract_packfile_from_multipack(
b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) ||
ph.idx_len == 0);

if (b_no_idx_in_multipack) {
my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL);
if (!tempfile_pack)
goto done;
} else {
/* create a pair of tempfiles with the same basename */
my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx);
if (!tempfile_pack || !tempfile_idx)
goto done;
}
/*
* We are going to harden `gvfs-helper` here and ignore the .idx file
* if it is provided and always compute it locally so that we get the
* added verification that `git index-pack` provides.
*/
my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL);
if (!tempfile_pack)
goto done;

/*
* Copy the current packfile from the open stream and capture
Expand All @@ -2186,38 +2188,31 @@ static void extract_packfile_from_multipack(

oid_to_hex_r(hex_checksum, &packfile_checksum);

if (b_no_idx_in_multipack) {
/*
* The server did not send the corresponding .idx, so
* we have to compute it ourselves.
*/
strbuf_addbuf(&temp_path_idx, &temp_path_pack);
strbuf_strip_suffix(&temp_path_idx, ".pack");
strbuf_addstr(&temp_path_idx, ".idx");
/*
* Always compute the .idx file from the .pack file.
*/
strbuf_addbuf(&temp_path_idx, &temp_path_pack);
strbuf_strip_suffix(&temp_path_idx, ".pack");
strbuf_addstr(&temp_path_idx, ".idx");

my_run_index_pack(params, status,
&temp_path_pack, &temp_path_idx,
NULL);
if (status->ec != GH__ERROR_CODE__OK)
goto done;
my_run_index_pack(params, status,
&temp_path_pack, &temp_path_idx,
NULL);
if (status->ec != GH__ERROR_CODE__OK)
goto done;

} else {
if (!b_no_idx_in_multipack) {
/*
* Server sent the .idx immediately after the .pack in the
* data stream. I'm tempted to verify it, but that defeats
* the purpose of having it cached...
* data stream. Skip over it.
*/
if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx),
ph.idx_len) < 0) {
if (lseek(fd_multipack, ph.idx_len, SEEK_CUR) < 0) {
strbuf_addf(&status->error_message,
"could not extract index[%d] in multipack",
"could not skip index[%d] in multipack",
k);
status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH;
goto done;
}

strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx));
close_tempfile_gently(tempfile_idx);
}

strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp);
Expand All @@ -2232,7 +2227,6 @@ static void extract_packfile_from_multipack(

done:
delete_tempfile(&tempfile_pack);
delete_tempfile(&tempfile_idx);
strbuf_release(&temp_path_pack);
strbuf_release(&temp_path_idx);
strbuf_release(&final_path_pack);
Expand Down Expand Up @@ -3781,6 +3775,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv)
static const char *since_str;
static struct option prefetch_options[] = {
OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")),
OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries,
N_("retries for transient network errors")),
OPT_END(),
};

Expand All @@ -3800,6 +3796,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv)
if (my_parse_since(since_str, &seconds_since_epoch))
die("could not parse 'since' field");
}
if (gh__cmd_opts.max_retries < 0)
gh__cmd_opts.max_retries = 0;

finish_init(1);

Expand Down
82 changes: 80 additions & 2 deletions t/helper/test-gvfs-protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b)
return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp);
}

#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))

/*
* Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the
* given mayhem key is defined) as we copy it to the destination file.
*
* We don't know (or care) if the input file is a pack file or idx
* file, just that the final bytes are part of a SHA that we can
* corrupt.
*/
static int copy_fd_with_checksum_mayhem(int ifd, int ofd,
const char *mayhem_key,
ssize_t nr_wrong_bytes)
{
off_t in_cur, in_len;
ssize_t bytes_to_copy;
ssize_t bytes_remaining_to_copy;
char buffer[8192];

if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes ||
!string_list_has_string(&mayhem_list, mayhem_key))
return copy_fd(ifd, ofd);

in_cur = lseek(ifd, 0, SEEK_CUR);
if (in_cur < 0)
return in_cur;

in_len = lseek(ifd, 0, SEEK_END);
if (in_len < 0)
return in_len;

if (lseek(ifd, in_cur, SEEK_SET) < 0)
return -1;

/* Copy the entire file except for the last few bytes. */

bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes;
bytes_remaining_to_copy = bytes_to_copy;
while (bytes_remaining_to_copy) {
ssize_t to_read = MY_MIN(sizeof(buffer), bytes_remaining_to_copy);
ssize_t len = xread(ifd, buffer, to_read);

if (!len)
return -1; /* error on unexpected EOF */
if (len < 0)
return -1;
if (write_in_full(ofd, buffer, len) < 0)
return -1;

bytes_remaining_to_copy -= len;
}

/* Read the trailing bytes so that we can alter them before copying. */

while (nr_wrong_bytes) {
ssize_t to_read = MY_MIN(sizeof(buffer), nr_wrong_bytes);
ssize_t len = xread(ifd, buffer, to_read);
ssize_t k;

if (!len)
return -1; /* error on unexpected EOF */
if (len < 0)
return -1;

for (k = 0; k < len; k++)
buffer[k] ^= 0xff;

if (write_in_full(ofd, buffer, len) < 0)
return -1;

nr_wrong_bytes -= len;
}

return 0;
}

static enum worker_result send_ct_item(const struct ct_pack_item *item)
{
struct ph ph_le;
Expand All @@ -1157,7 +1233,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf);

fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY);
if (fd_pack == -1 || copy_fd(fd_pack, 1)) {
if (fd_pack == -1 ||
copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) {
logerror("could not send packfile");
wr = WR_IO_ERROR;
goto done;
Expand All @@ -1167,7 +1244,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf);

fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY);
if (fd_idx == -1 || copy_fd(fd_idx, 1)) {
if (fd_idx == -1 ||
copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) {
logerror("could not send idx");
wr = WR_IO_ERROR;
goto done;
Expand Down
65 changes: 64 additions & 1 deletion t/t5799-gvfs-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' '
# content matches the requested SHA.
#
test_expect_success 'catch corrupted loose object' '
# test_when_finished "per_test_cleanup" &&
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem corrupt_loose &&

test_must_fail \
Expand All @@ -1322,4 +1322,67 @@ test_expect_success 'catch corrupted loose object' '
git -C "$REPO_T1" fsck
'

#################################################################
# Ensure that we can detect when we receive a corrupted packfile
# from the server. This is not concerned with network IO errors,
# but rather cases when the cache or origin server generates or
# sends an invalid packfile.
#
# For example, if the server throws an exception and writes the
# stack trace to the socket rather than or in addition to the
# packfile content.
#
# Or for example, if the packfile on the server's disk is corrupt
# and it sends it correctly, but the original data was already
# garbage, so the client still has garbage (and retrying won't
# help).
#################################################################

# Send corrupt PACK files w/o IDX files (so that `gvfs-helper`
# must use `index-pack` to create it. (And as a side-effect,
# validate the PACK file is not corrupt.)
test_expect_success 'prefetch corrupt pack without idx' '
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem \
bad_prefetch_pack_sha \
no_prefetch_idx &&

test_must_fail \
git -C "$REPO_T1" gvfs-helper \
--cache-server=disable \
--remote=origin \
--no-progress \
prefetch \
--max-retries=0 \
--since="1000000000" \
>OUT.output 2>OUT.stderr &&

stop_gvfs_protocol_server &&

# Verify corruption detected in pack when building
# local idx file for it.

grep -q "error: .* index-pack failed" <OUT.stderr
'

# Send corrupt PACK files with IDX files. Since the cache server
# sends both, `gvfs-helper` might fail to verify both of them.
test_expect_success 'prefetch corrupt pack with corrupt idx' '
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem \
bad_prefetch_pack_sha &&

test_must_fail \
git -C "$REPO_T1" gvfs-helper \
--cache-server=disable \
--remote=origin \
--no-progress \
prefetch \
--max-retries=0 \
--since="1000000000" \
>OUT.output 2>OUT.stderr &&

stop_gvfs_protocol_server
'

test_done