Skip to content

Commit 881d8cf

Browse files
committed
maintenance: add new vfs-cache-move maintenance task
Introduce a new maintenance task, `vfs-cache-move`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Signed-off-by: Matthew John Cheetham <[email protected]>
1 parent d9f837f commit 881d8cf

File tree

3 files changed

+277
-0
lines changed

3 files changed

+277
-0
lines changed

Documentation/git-maintenance.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ task:
6969
* `prefetch`: hourly.
7070
* `loose-objects`: daily.
7171
* `incremental-repack`: daily.
72+
* `vfs-cache-move`: weekly.
7273
--
7374
+
7475
`git maintenance register` will also disable foreground maintenance by
@@ -158,6 +159,13 @@ pack-refs::
158159
need to iterate across many references. See linkgit:git-pack-refs[1]
159160
for more information.
160161

162+
vfs-cache-move::
163+
The `vfs-cache-move` task only operates on Scalar or VFS for Git
164+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
165+
have the `gvfs.sharedCache` configuration setting present. This task
166+
migrates pack files from the repository's object directory in to the
167+
shared volume cache.
168+
161169
OPTIONS
162170
-------
163171
--auto::

builtin/gc.c

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Copyright (c) 2006 Shawn O. Pearce
1111
*/
1212
#define USE_THE_REPOSITORY_VARIABLE
13+
#include "git-compat-util.h"
1314
#include "builtin.h"
1415
#include "abspath.h"
1516
#include "date.h"
@@ -41,9 +42,19 @@
4142
#include "hook.h"
4243
#include "setup.h"
4344
#include "trace2.h"
45+
#include "copy.h"
46+
#include "dir.h"
4447

4548
#define FAILED_RUN "failed to run %s"
4649

50+
#define DEBUG_WAIT { \
51+
struct stat st; \
52+
while (!stat("D:/tmp/debug", &st)) { \
53+
fprintf(stderr, "[%d] Waiting to attach...\n", getpid()); \
54+
sleep(5); \
55+
} \
56+
}
57+
4758
static const char * const builtin_gc_usage[] = {
4859
N_("git gc [<options>]"),
4960
NULL
@@ -1347,6 +1358,132 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
13471358
return 0;
13481359
}
13491360

1361+
static void link_or_copy_or_die(const char *src, const char *dst)
1362+
{
1363+
if (!link(src, dst))
1364+
return;
1365+
1366+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1367+
1368+
if (!copy_file(dst, src, 0644))
1369+
return;
1370+
1371+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1372+
}
1373+
1374+
static void migrate_pack(const char *srcdir, const char *dstdir,
1375+
const char *pack_filename)
1376+
{
1377+
struct stat st;
1378+
int has_keep, has_rev, has_idx;
1379+
char *basename, *pack_src, *keep_src, *rev_src, *idx_src,
1380+
*pack_dst, *keep_dst, *rev_dst, *idx_dst;
1381+
1382+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1383+
1384+
basename = xstrndup(pack_filename, strlen(pack_filename) - 5 /*.pack*/);
1385+
pack_src = xstrfmt("%s/%s", srcdir, pack_filename);
1386+
pack_dst = xstrfmt("%s/%s", dstdir, pack_filename);
1387+
keep_src = xstrfmt("%s/%s.keep", srcdir, basename);
1388+
keep_dst = xstrfmt("%s/%s.keep", dstdir, basename);
1389+
rev_src = xstrfmt("%s/%s.rev", srcdir, basename);
1390+
rev_dst = xstrfmt("%s/%s.rev", dstdir, basename);
1391+
idx_src = xstrfmt("%s/%s.idx", srcdir, basename);
1392+
idx_dst = xstrfmt("%s/%s.idx", dstdir, basename);
1393+
1394+
has_keep = !stat(keep_src, &st);
1395+
has_rev = !stat(rev_src, &st);
1396+
has_idx = !stat(idx_src, &st);
1397+
1398+
/* A pack without an index file is not yet ready to be migrated. */
1399+
if (!has_idx)
1400+
goto cleanup;
1401+
1402+
/*
1403+
* Hard link (or copy if that fails) all but the index file so that
1404+
* other Git processes don't attempt to use the pack file from the new
1405+
* location yet.
1406+
*/
1407+
link_or_copy_or_die(pack_src, pack_dst);
1408+
if (has_keep)
1409+
link_or_copy_or_die(keep_src, keep_dst);
1410+
if (has_rev)
1411+
link_or_copy_or_die(rev_src, rev_dst);
1412+
1413+
/*
1414+
* Move the index file atomically now that the other files can be found
1415+
* at the destination.
1416+
*/
1417+
if (rename(idx_src, idx_dst))
1418+
die_errno(_("failed to move '%s' to '%s'"), idx_src, idx_dst);
1419+
1420+
/*
1421+
* Now the pack and all associated files exist at the destination we can
1422+
* now clean up the files in the source directory.
1423+
*/
1424+
if (unlink(pack_src))
1425+
warning_errno(_("failed to delete '%s'"), pack_src);
1426+
if (has_keep && unlink(keep_src))
1427+
warning_errno(_("failed to delete '%s'"), keep_src);
1428+
if (has_rev & unlink(rev_src))
1429+
warning_errno(_("failed to delete '%s'"), rev_src);
1430+
1431+
cleanup:
1432+
free(idx_src);
1433+
free(idx_dst);
1434+
free(rev_src);
1435+
free(rev_dst);
1436+
free(keep_src);
1437+
free(keep_dst);
1438+
free(pack_src);
1439+
free(pack_dst);
1440+
free(basename);
1441+
1442+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1443+
}
1444+
1445+
static void move_pack_to_vfs_cache(const char *full_path, size_t full_path_len,
1446+
const char *file_name, UNUSED void *data)
1447+
{
1448+
char *srcdir;
1449+
struct strbuf dstdir = STRBUF_INIT;
1450+
1451+
/* We only care about the actual pack files here.
1452+
* The associated .idx, .keep, .rev files will be copied in tandem
1453+
* with the pack file, with the index file being moved last.
1454+
* The original locations of the non-index files will only deleted
1455+
* once all other files have been copied/moved.
1456+
*/
1457+
if (!ends_with(file_name, ".pack"))
1458+
return;
1459+
1460+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1461+
1462+
/* No cache or same source + desintation means there's no work to do. */
1463+
if (!vfs_object_dir || !fspathcmp(srcdir, vfs_object_dir))
1464+
return;
1465+
1466+
strbuf_addf(&dstdir, "%s/pack", vfs_object_dir);
1467+
1468+
migrate_pack(srcdir, dstdir.buf, file_name);
1469+
1470+
free(srcdir);
1471+
strbuf_release(&dstdir);
1472+
}
1473+
1474+
static int maintenance_task_vfs_cache_move(UNUSED struct maintenance_run_opts *opts,
1475+
UNUSED struct gc_config *cfg)
1476+
{
1477+
struct repository *r = the_repository;
1478+
1479+
DEBUG_WAIT
1480+
1481+
for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_vfs_cache,
1482+
NULL);
1483+
1484+
return 0;
1485+
}
1486+
13501487
typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
13511488
struct gc_config *cfg);
13521489

@@ -1376,6 +1513,7 @@ enum maintenance_task_label {
13761513
TASK_GC,
13771514
TASK_COMMIT_GRAPH,
13781515
TASK_PACK_REFS,
1516+
TASK_VFS_CACHE_MOVE,
13791517

13801518
/* Leave as final value */
13811519
TASK__COUNT
@@ -1412,6 +1550,10 @@ static struct maintenance_task tasks[] = {
14121550
maintenance_task_pack_refs,
14131551
pack_refs_condition,
14141552
},
1553+
[TASK_VFS_CACHE_MOVE] = {
1554+
"vfs-cache-move",
1555+
maintenance_task_vfs_cache_move,
1556+
},
14151557
};
14161558

14171559
static int compare_tasks_by_selection(const void *a_, const void *b_)
@@ -1506,6 +1648,8 @@ static void initialize_maintenance_strategy(void)
15061648
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
15071649
tasks[TASK_PACK_REFS].enabled = 1;
15081650
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
1651+
tasks[TASK_VFS_CACHE_MOVE].enabled = 1;
1652+
tasks[TASK_VFS_CACHE_MOVE].schedule = SCHEDULE_WEEKLY;
15091653
}
15101654
}
15111655

t/t7900-maintenance.sh

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,4 +1012,129 @@ test_expect_success 'repacking loose objects is quiet' '
10121012
)
10131013
'
10141014

1015+
test_expect_success 'vfs-cache-move task with no shared cache skips' '
1016+
test_when_finished "rm -rf repo" &&
1017+
git init repo &&
1018+
(
1019+
cd repo &&
1020+
1021+
test_commit something &&
1022+
git config set maintenance.gc.enabled false &&
1023+
git config set maintenance.vfs-cache-move.enabled true &&
1024+
git config set maintenance.vfs-cache-move.auto 1 &&
1025+
1026+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
1027+
git -c fastimport.unpackLimit=0 fast-import &&
1028+
find .git/objects/pack \
1029+
-type f \
1030+
\( -name "*.pack" \
1031+
-o -name "*.idx" \
1032+
-o -name "*.keep" \
1033+
-o -name "*.rev" \) >files.txt &&
1034+
1035+
git maintenance run &&
1036+
while IFS= read -r f; do
1037+
test_path_exists $f || exit 1
1038+
done <files.txt
1039+
)
1040+
'
1041+
1042+
test_expect_success 'vfs-cache-move task cache path same as pack dir skips' '
1043+
test_when_finished "rm -rf repo" &&
1044+
git init repo &&
1045+
(
1046+
cd repo &&
1047+
1048+
test_commit something &&
1049+
git config set gvfs.sharedcache .git/objects/pack &&
1050+
git config set maintenance.gc.enabled false &&
1051+
git config set maintenance.vfs-cache-move.enabled true &&
1052+
git config set maintenance.vfs-cache-move.auto 1 &&
1053+
1054+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
1055+
git -c fastimport.unpackLimit=0 fast-import &&
1056+
find .git/objects/pack \
1057+
-type f \
1058+
\( -name "*.pack" \
1059+
-o -name "*.idx" \
1060+
-o -name "*.keep" \
1061+
-o -name "*.rev" \) >files.txt &&
1062+
1063+
git maintenance run &&
1064+
while IFS= read -r f; do
1065+
test_path_exists $f || exit 1
1066+
done <files.txt
1067+
)
1068+
'
1069+
1070+
test_expect_success 'vfs-cache-move task no .rev or .keep' '
1071+
test_when_finished "rm -rf repo ../cache" &&
1072+
mkdir -p cache/pack &&
1073+
git init repo &&
1074+
(
1075+
cd repo &&
1076+
1077+
test_commit something &&
1078+
git config set gvfs.sharedcache ../cache &&
1079+
git config set maintenance.gc.enabled false &&
1080+
git config set maintenance.vfs-cache-move.enabled true &&
1081+
git config set maintenance.vfs-cache-move.auto 1 &&
1082+
1083+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
1084+
git -c fastimport.unpackLimit=0 fast-import &&
1085+
find .git/objects/pack \
1086+
-type f \
1087+
\( -name "*.pack" \
1088+
-o -name "*.idx" \) >src.txt &&
1089+
1090+
rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep &&
1091+
1092+
sed "s|.*/|../cache/pack/|" src.txt >dst.txt &&
1093+
1094+
git maintenance run &&
1095+
while IFS= read -r f; do
1096+
test_path_is_missing $f || exit 1
1097+
done <src.txt &&
1098+
1099+
while IFS= read -r f; do
1100+
test_path_exists $f || exit 1
1101+
done <dst.txt
1102+
)
1103+
'
1104+
1105+
test_expect_success 'vfs-cache-move task success' '
1106+
test_when_finished "rm -rf repo ../cache" &&
1107+
mkdir -p cache/pack &&
1108+
git init repo &&
1109+
(
1110+
cd repo &&
1111+
1112+
test_commit something &&
1113+
git config set gvfs.sharedcache ../cache &&
1114+
git config set maintenance.gc.enabled false &&
1115+
git config set maintenance.vfs-cache-move.enabled true &&
1116+
git config set maintenance.vfs-cache-move.auto 1 &&
1117+
1118+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
1119+
git -c fastimport.unpackLimit=0 fast-import &&
1120+
find .git/objects/pack \
1121+
-type f \
1122+
\( -name "*.pack" \
1123+
-o -name "*.idx" \
1124+
-o -name "*.keep" \
1125+
-o -name "*.rev" \) >src.txt &&
1126+
1127+
sed "s|.*/|../cache/pack/|" src.txt >dst.txt &&
1128+
1129+
git maintenance run &&
1130+
while IFS= read -r f; do
1131+
test_path_is_missing $f || exit 1
1132+
done <src.txt &&
1133+
1134+
while IFS= read -r f; do
1135+
test_path_exists $f || exit 1
1136+
done <dst.txt
1137+
)
1138+
'
1139+
10151140
test_done

0 commit comments

Comments
 (0)