Skip to content

Commit b282ae7

Browse files
jeffhostetlerdscho
authored andcommitted
t5799: add tests to detect corrupt pack/idx files in prefetch
Add "mayhem" keys to generate corrupt packfiles and/or corrupt idx files in prefetch by trashing the trailing checksum SHA. Add unit tests to t5799 to verify that `gvfs-helper` detects these corrupt pack/idx files. Currently, only the (bad-pack, no-idx) case is correctly detected, Because `gvfs-helper` needs to locally compute the idx file itself. A test for the (bad-pack, any-idx) case was also added (as a known breakage) because `gvfs-helper` assumes that when the cache server provides both, it doesn't need to verify them. We will fix that assumption in the next commit. Signed-off-by: Jeff Hostetler <[email protected]>
1 parent 7f9bc3c commit b282ae7

File tree

2 files changed

+147
-3
lines changed

2 files changed

+147
-3
lines changed

t/helper/test-gvfs-protocol.c

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,6 +1154,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b)
11541154
return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp);
11551155
}
11561156

1157+
#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
1158+
1159+
/*
1160+
* Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the
1161+
* given mayhem key is defined) as we copy it to the destination file.
1162+
*
1163+
* We don't know (or care) if the input file is a pack file or idx
1164+
* file, just that the final bytes are part of a SHA that we can
1165+
* corrupt.
1166+
*/
1167+
static int copy_fd_with_checksum_mayhem(int ifd, int ofd,
1168+
const char *mayhem_key,
1169+
ssize_t nr_wrong_bytes)
1170+
{
1171+
off_t in_cur, in_len;
1172+
ssize_t bytes_to_copy;
1173+
ssize_t bytes_remaining_to_copy;
1174+
char buffer[8192];
1175+
1176+
if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes ||
1177+
!string_list_has_string(&mayhem_list, mayhem_key))
1178+
return copy_fd(ifd, ofd);
1179+
1180+
in_cur = lseek(ifd, 0, SEEK_CUR);
1181+
if (in_cur < 0)
1182+
return in_cur;
1183+
1184+
in_len = lseek(ifd, 0, SEEK_END);
1185+
if (in_len < 0)
1186+
return in_len;
1187+
1188+
if (lseek(ifd, in_cur, SEEK_SET) < 0)
1189+
return -1;
1190+
1191+
/* Copy the entire file except for the last few bytes. */
1192+
1193+
bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes;
1194+
bytes_remaining_to_copy = bytes_to_copy;
1195+
while (bytes_remaining_to_copy) {
1196+
ssize_t to_read = MY_MIN((ssize_t)sizeof(buffer), bytes_remaining_to_copy);
1197+
ssize_t len = xread(ifd, buffer, to_read);
1198+
1199+
if (!len)
1200+
return -1; /* error on unexpected EOF */
1201+
if (len < 0)
1202+
return -1;
1203+
if (write_in_full(ofd, buffer, len) < 0)
1204+
return -1;
1205+
1206+
bytes_remaining_to_copy -= len;
1207+
}
1208+
1209+
/* Read the trailing bytes so that we can alter them before copying. */
1210+
1211+
while (nr_wrong_bytes) {
1212+
ssize_t to_read = MY_MIN((ssize_t)sizeof(buffer), nr_wrong_bytes);
1213+
ssize_t len = xread(ifd, buffer, to_read);
1214+
ssize_t k;
1215+
1216+
if (!len)
1217+
return -1; /* error on unexpected EOF */
1218+
if (len < 0)
1219+
return -1;
1220+
1221+
for (k = 0; k < len; k++)
1222+
buffer[k] ^= 0xff;
1223+
1224+
if (write_in_full(ofd, buffer, len) < 0)
1225+
return -1;
1226+
1227+
nr_wrong_bytes -= len;
1228+
}
1229+
1230+
return 0;
1231+
}
1232+
11571233
static enum worker_result send_ct_item(const struct ct_pack_item *item)
11581234
{
11591235
struct ph ph_le;
@@ -1175,7 +1251,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
11751251
trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf);
11761252

11771253
fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY);
1178-
if (fd_pack == -1 || copy_fd(fd_pack, 1)) {
1254+
if (fd_pack == -1 ||
1255+
copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) {
11791256
logerror("could not send packfile");
11801257
wr = WR_IO_ERROR;
11811258
goto done;
@@ -1185,7 +1262,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
11851262
trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf);
11861263

11871264
fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY);
1188-
if (fd_idx == -1 || copy_fd(fd_idx, 1)) {
1265+
if (fd_idx == -1 ||
1266+
copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) {
11891267
logerror("could not send idx");
11901268
wr = WR_IO_ERROR;
11911269
goto done;

t/t5799-gvfs-helper.sh

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' '
12991299
# content matches the requested SHA.
13001300
#
13011301
test_expect_success 'catch corrupted loose object' '
1302-
# test_when_finished "per_test_cleanup" &&
1302+
test_when_finished "per_test_cleanup" &&
13031303
start_gvfs_protocol_server_with_mayhem corrupt_loose &&
13041304
13051305
test_must_fail \
@@ -1322,4 +1322,70 @@ test_expect_success 'catch corrupted loose object' '
13221322
git -C "$REPO_T1" fsck
13231323
'
13241324

1325+
#################################################################
1326+
# Ensure that we can detect when we receive a corrupted packfile
1327+
# from the server. This is not concerned with network IO errors,
1328+
# but rather cases when the cache or origin server generates or
1329+
# sends an invalid packfile.
1330+
#
1331+
# For example, if the server throws an exception and writes the
1332+
# stack trace to the socket rather than or in addition to the
1333+
# packfile content.
1334+
#
1335+
# Or for example, if the packfile on the server's disk is corrupt
1336+
# and it sends it correctly, but the original data was already
1337+
# garbage, so the client still has garbage (and retrying won't
1338+
# help).
1339+
#################################################################
1340+
1341+
# Send corrupt PACK files w/o IDX files (so that `gvfs-helper`
1342+
# must use `index-pack` to create it. (And as a side-effect,
1343+
# validate the PACK file is not corrupt.)
1344+
test_expect_success 'prefetch corrupt pack without idx' '
1345+
test_when_finished "per_test_cleanup" &&
1346+
start_gvfs_protocol_server_with_mayhem \
1347+
bad_prefetch_pack_sha \
1348+
no_prefetch_idx &&
1349+
1350+
test_must_fail \
1351+
git -C "$REPO_T1" gvfs-helper \
1352+
--cache-server=disable \
1353+
--remote=origin \
1354+
--no-progress \
1355+
prefetch \
1356+
--max-retries=0 \
1357+
--since="1000000000" \
1358+
>OUT.output 2>OUT.stderr &&
1359+
1360+
stop_gvfs_protocol_server &&
1361+
1362+
# Verify corruption detected in pack when building
1363+
# local idx file for it.
1364+
1365+
grep -q "error: .* index-pack failed" <OUT.stderr
1366+
'
1367+
1368+
# Send corrupt PACK files with IDX files. Since the cache server
1369+
# sends both, `gvfs-helper` might fail to verify both of them.
1370+
test_expect_failure 'prefetch corrupt pack with corrupt idx' '
1371+
test_when_finished "per_test_cleanup" &&
1372+
start_gvfs_protocol_server_with_mayhem \
1373+
bad_prefetch_pack_sha &&
1374+
1375+
# TODO This is a false-positive since `gvfs-helper`
1376+
# TODO does not verify either of them when a pair
1377+
# TODO is sent.
1378+
test_must_fail \
1379+
git -C "$REPO_T1" gvfs-helper \
1380+
--cache-server=disable \
1381+
--remote=origin \
1382+
--no-progress \
1383+
prefetch \
1384+
--max-retries=0 \
1385+
--since="1000000000" \
1386+
>OUT.output 2>OUT.stderr &&
1387+
1388+
stop_gvfs_protocol_server
1389+
'
1390+
13251391
test_done

0 commit comments

Comments
 (0)