Skip to content

Commit 27f6460

Browse files
jeffhostetlerderrickstolee
authored andcommitted
Harden gvfs-helper to validate the packfiles in a multipart prefetch response (#571)
Teach `gvfs-helper` to ignore the optional `.idx` files that may be included in a `prefetch` response and always use `git index-pack` to create them from the `.pack` files received in the data stream. This is a little wasteful in terms of client-side compute and of the network bandwidth, but allows us to use the full packfile verification code contained within `git index-pack` to ensure that the received packfiles are valid.
2 parents 3361e8a + e3f2759 commit 27f6460

File tree

3 files changed

+176
-37
lines changed

3 files changed

+176
-37
lines changed

gvfs-helper.c

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@
105105
// The GVFS Protocol defines this value as a way to
106106
// request cached packfiles NEWER THAN this timestamp.
107107
//
108+
// --max-retries=<n> // defaults to "6"
109+
//
110+
// Number of retries after transient network errors.
111+
// Set to zero to disable such retries.
112+
//
108113
// server
109114
//
110115
// Interactive/sub-process mode. Listen for a series of commands
@@ -2116,7 +2121,6 @@ static void extract_packfile_from_multipack(
21162121
{
21172122
struct ph ph;
21182123
struct tempfile *tempfile_pack = NULL;
2119-
struct tempfile *tempfile_idx = NULL;
21202124
int result = -1;
21212125
int b_no_idx_in_multipack;
21222126
struct object_id packfile_checksum;
@@ -2150,16 +2154,14 @@ static void extract_packfile_from_multipack(
21502154
b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) ||
21512155
ph.idx_len == 0);
21522156

2153-
if (b_no_idx_in_multipack) {
2154-
my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL);
2155-
if (!tempfile_pack)
2156-
goto done;
2157-
} else {
2158-
/* create a pair of tempfiles with the same basename */
2159-
my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx);
2160-
if (!tempfile_pack || !tempfile_idx)
2161-
goto done;
2162-
}
2157+
/*
2158+
* We are going to harden `gvfs-helper` here and ignore the .idx file
2159+
* if it is provided and always compute it locally so that we get the
2160+
* added verification that `git index-pack` provides.
2161+
*/
2162+
my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL);
2163+
if (!tempfile_pack)
2164+
goto done;
21632165

21642166
/*
21652167
* Copy the current packfile from the open stream and capture
@@ -2186,38 +2188,31 @@ static void extract_packfile_from_multipack(
21862188

21872189
oid_to_hex_r(hex_checksum, &packfile_checksum);
21882190

2189-
if (b_no_idx_in_multipack) {
2190-
/*
2191-
* The server did not send the corresponding .idx, so
2192-
* we have to compute it ourselves.
2193-
*/
2194-
strbuf_addbuf(&temp_path_idx, &temp_path_pack);
2195-
strbuf_strip_suffix(&temp_path_idx, ".pack");
2196-
strbuf_addstr(&temp_path_idx, ".idx");
2191+
/*
2192+
* Always compute the .idx file from the .pack file.
2193+
*/
2194+
strbuf_addbuf(&temp_path_idx, &temp_path_pack);
2195+
strbuf_strip_suffix(&temp_path_idx, ".pack");
2196+
strbuf_addstr(&temp_path_idx, ".idx");
21972197

2198-
my_run_index_pack(params, status,
2199-
&temp_path_pack, &temp_path_idx,
2200-
NULL);
2201-
if (status->ec != GH__ERROR_CODE__OK)
2202-
goto done;
2198+
my_run_index_pack(params, status,
2199+
&temp_path_pack, &temp_path_idx,
2200+
NULL);
2201+
if (status->ec != GH__ERROR_CODE__OK)
2202+
goto done;
22032203

2204-
} else {
2204+
if (!b_no_idx_in_multipack) {
22052205
/*
22062206
* Server sent the .idx immediately after the .pack in the
2207-
* data stream. I'm tempted to verify it, but that defeats
2208-
* the purpose of having it cached...
2207+
* data stream. Skip over it.
22092208
*/
2210-
if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx),
2211-
ph.idx_len) < 0) {
2209+
if (lseek(fd_multipack, ph.idx_len, SEEK_CUR) < 0) {
22122210
strbuf_addf(&status->error_message,
2213-
"could not extract index[%d] in multipack",
2211+
"could not skip index[%d] in multipack",
22142212
k);
22152213
status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH;
22162214
goto done;
22172215
}
2218-
2219-
strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx));
2220-
close_tempfile_gently(tempfile_idx);
22212216
}
22222217

22232218
strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp);
@@ -2232,7 +2227,6 @@ static void extract_packfile_from_multipack(
22322227

22332228
done:
22342229
delete_tempfile(&tempfile_pack);
2235-
delete_tempfile(&tempfile_idx);
22362230
strbuf_release(&temp_path_pack);
22372231
strbuf_release(&temp_path_idx);
22382232
strbuf_release(&final_path_pack);
@@ -3781,6 +3775,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv)
37813775
static const char *since_str;
37823776
static struct option prefetch_options[] = {
37833777
OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")),
3778+
OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries,
3779+
N_("retries for transient network errors")),
37843780
OPT_END(),
37853781
};
37863782

@@ -3800,6 +3796,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv)
38003796
if (my_parse_since(since_str, &seconds_since_epoch))
38013797
die("could not parse 'since' field");
38023798
}
3799+
if (gh__cmd_opts.max_retries < 0)
3800+
gh__cmd_opts.max_retries = 0;
38033801

38043802
finish_init(1);
38053803

t/helper/test-gvfs-protocol.c

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b)
11361136
return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp);
11371137
}
11381138

1139+
#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
1140+
1141+
/*
1142+
* Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the
1143+
* given mayhem key is defined) as we copy it to the destination file.
1144+
*
1145+
* We don't know (or care) if the input file is a pack file or idx
1146+
* file, just that the final bytes are part of a SHA that we can
1147+
* corrupt.
1148+
*/
1149+
static int copy_fd_with_checksum_mayhem(int ifd, int ofd,
1150+
const char *mayhem_key,
1151+
ssize_t nr_wrong_bytes)
1152+
{
1153+
off_t in_cur, in_len;
1154+
ssize_t bytes_to_copy;
1155+
ssize_t bytes_remaining_to_copy;
1156+
char buffer[8192];
1157+
1158+
if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes ||
1159+
!string_list_has_string(&mayhem_list, mayhem_key))
1160+
return copy_fd(ifd, ofd);
1161+
1162+
in_cur = lseek(ifd, 0, SEEK_CUR);
1163+
if (in_cur < 0)
1164+
return in_cur;
1165+
1166+
in_len = lseek(ifd, 0, SEEK_END);
1167+
if (in_len < 0)
1168+
return in_len;
1169+
1170+
if (lseek(ifd, in_cur, SEEK_SET) < 0)
1171+
return -1;
1172+
1173+
/* Copy the entire file except for the last few bytes. */
1174+
1175+
bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes;
1176+
bytes_remaining_to_copy = bytes_to_copy;
1177+
while (bytes_remaining_to_copy) {
1178+
ssize_t to_read = MY_MIN(sizeof(buffer), bytes_remaining_to_copy);
1179+
ssize_t len = xread(ifd, buffer, to_read);
1180+
1181+
if (!len)
1182+
return -1; /* error on unexpected EOF */
1183+
if (len < 0)
1184+
return -1;
1185+
if (write_in_full(ofd, buffer, len) < 0)
1186+
return -1;
1187+
1188+
bytes_remaining_to_copy -= len;
1189+
}
1190+
1191+
/* Read the trailing bytes so that we can alter them before copying. */
1192+
1193+
while (nr_wrong_bytes) {
1194+
ssize_t to_read = MY_MIN(sizeof(buffer), nr_wrong_bytes);
1195+
ssize_t len = xread(ifd, buffer, to_read);
1196+
ssize_t k;
1197+
1198+
if (!len)
1199+
return -1; /* error on unexpected EOF */
1200+
if (len < 0)
1201+
return -1;
1202+
1203+
for (k = 0; k < len; k++)
1204+
buffer[k] ^= 0xff;
1205+
1206+
if (write_in_full(ofd, buffer, len) < 0)
1207+
return -1;
1208+
1209+
nr_wrong_bytes -= len;
1210+
}
1211+
1212+
return 0;
1213+
}
1214+
11391215
static enum worker_result send_ct_item(const struct ct_pack_item *item)
11401216
{
11411217
struct ph ph_le;
@@ -1157,7 +1233,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
11571233
trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf);
11581234

11591235
fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY);
1160-
if (fd_pack == -1 || copy_fd(fd_pack, 1)) {
1236+
if (fd_pack == -1 ||
1237+
copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) {
11611238
logerror("could not send packfile");
11621239
wr = WR_IO_ERROR;
11631240
goto done;
@@ -1167,7 +1244,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
11671244
trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf);
11681245

11691246
fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY);
1170-
if (fd_idx == -1 || copy_fd(fd_idx, 1)) {
1247+
if (fd_idx == -1 ||
1248+
copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) {
11711249
logerror("could not send idx");
11721250
wr = WR_IO_ERROR;
11731251
goto done;

t/t5799-gvfs-helper.sh

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' '
12991299
# content matches the requested SHA.
13001300
#
13011301
test_expect_success 'catch corrupted loose object' '
1302-
# test_when_finished "per_test_cleanup" &&
1302+
test_when_finished "per_test_cleanup" &&
13031303
start_gvfs_protocol_server_with_mayhem corrupt_loose &&
13041304
13051305
test_must_fail \
@@ -1322,4 +1322,67 @@ test_expect_success 'catch corrupted loose object' '
13221322
git -C "$REPO_T1" fsck
13231323
'
13241324

1325+
#################################################################
1326+
# Ensure that we can detect when we receive a corrupted packfile
1327+
# from the server. This is not concerned with network IO errors,
1328+
# but rather cases when the cache or origin server generates or
1329+
# sends an invalid packfile.
1330+
#
1331+
# For example, if the server throws an exception and writes the
1332+
# stack trace to the socket rather than or in addition to the
1333+
# packfile content.
1334+
#
1335+
# Or for example, if the packfile on the server's disk is corrupt
1336+
# and it sends it correctly, but the original data was already
1337+
# garbage, so the client still has garbage (and retrying won't
1338+
# help).
1339+
#################################################################
1340+
1341+
# Send corrupt PACK files w/o IDX files (so that `gvfs-helper`
1342+
# must use `index-pack` to create it. (And as a side-effect,
1343+
# validate the PACK file is not corrupt.)
1344+
test_expect_success 'prefetch corrupt pack without idx' '
1345+
test_when_finished "per_test_cleanup" &&
1346+
start_gvfs_protocol_server_with_mayhem \
1347+
bad_prefetch_pack_sha \
1348+
no_prefetch_idx &&
1349+
1350+
test_must_fail \
1351+
git -C "$REPO_T1" gvfs-helper \
1352+
--cache-server=disable \
1353+
--remote=origin \
1354+
--no-progress \
1355+
prefetch \
1356+
--max-retries=0 \
1357+
--since="1000000000" \
1358+
>OUT.output 2>OUT.stderr &&
1359+
1360+
stop_gvfs_protocol_server &&
1361+
1362+
# Verify corruption detected in pack when building
1363+
# local idx file for it.
1364+
1365+
grep -q "error: .* index-pack failed" <OUT.stderr
1366+
'
1367+
1368+
# Send corrupt PACK files with IDX files. Since the cache server
1369+
# sends both, `gvfs-helper` might fail to verify both of them.
1370+
test_expect_success 'prefetch corrupt pack with corrupt idx' '
1371+
test_when_finished "per_test_cleanup" &&
1372+
start_gvfs_protocol_server_with_mayhem \
1373+
bad_prefetch_pack_sha &&
1374+
1375+
test_must_fail \
1376+
git -C "$REPO_T1" gvfs-helper \
1377+
--cache-server=disable \
1378+
--remote=origin \
1379+
--no-progress \
1380+
prefetch \
1381+
--max-retries=0 \
1382+
--since="1000000000" \
1383+
>OUT.output 2>OUT.stderr &&
1384+
1385+
stop_gvfs_protocol_server
1386+
'
1387+
13251388
test_done

0 commit comments

Comments
 (0)