Skip to content

Commit adc9200

Browse files
mjcheethamdscho
authored andcommitted
maintenance: add cache-local-objects maintenance task
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Migration of packfiles involves the following steps for each pack: 1. Hardlink (or copy): a. the .pack file b. the .keep file c. the .rev file 2. Move (or copy + delete) the .idx file 3. Delete/unlink: a. the .pack file b. the .keep file c. the .rev file Moving the index file after the others ensures the pack is not read from the new cache directory until all associated files (rev, keep) exist in the cache directory also. Moving loose objects operates as a move, or copy + delete. Signed-off-by: Matthew John Cheetham <[email protected]>
1 parent 4afc5ec commit adc9200

File tree

3 files changed

+327
-0
lines changed

3 files changed

+327
-0
lines changed

Documentation/git-maintenance.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ task:
6969
* `prefetch`: hourly.
7070
* `loose-objects`: daily.
7171
* `incremental-repack`: daily.
72+
* `cache-local-objects`: weekly.
7273
--
7374
+
7475
`git maintenance register` will also disable foreground maintenance by
@@ -158,6 +159,13 @@ pack-refs::
158159
need to iterate across many references. See linkgit:git-pack-refs[1]
159160
for more information.
160161

162+
cache-local-objects::
163+
The `cache-local-objects` task only operates on Scalar or VFS for Git
164+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
165+
have the `gvfs.sharedCache` configuration setting present. This task
166+
migrates pack files and loose objects from the repository's object
167+
directory in to the shared volume cache.
168+
161169
OPTIONS
162170
-------
163171
--auto::

builtin/gc.c

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define USE_THE_REPOSITORY_VARIABLE
1414
#define DISABLE_SIGN_COMPARE_WARNINGS
1515

16+
#include "git-compat-util.h"
1617
#include "builtin.h"
1718
#include "abspath.h"
1819
#include "date.h"
@@ -44,6 +45,8 @@
4445
#include "hook.h"
4546
#include "setup.h"
4647
#include "trace2.h"
48+
#include "copy.h"
49+
#include "dir.h"
4750

4851
#define FAILED_RUN "failed to run %s"
4952

@@ -1360,6 +1363,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
13601363
return 0;
13611364
}
13621365

1366+
static void link_or_copy_or_die(const char *src, const char *dst)
1367+
{
1368+
if (!link(src, dst))
1369+
return;
1370+
1371+
/* Use copy operation if src and dst are on different file systems. */
1372+
if (errno != EXDEV)
1373+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1374+
1375+
if (copy_file(dst, src, 0444))
1376+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1377+
}
1378+
1379+
static void rename_or_copy_or_die(const char *src, const char *dst)
1380+
{
1381+
if (!rename(src, dst))
1382+
return;
1383+
1384+
/* Use copy and delete if src and dst are on different file systems. */
1385+
if (errno != EXDEV)
1386+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1387+
1388+
if (copy_file(dst, src, 0444))
1389+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1390+
1391+
if (unlink(src))
1392+
die_errno(_("failed to delete '%s'"), src);
1393+
}
1394+
1395+
static void migrate_pack(const char *srcdir, const char *dstdir,
1396+
const char *pack_filename)
1397+
{
1398+
size_t basenamelen, srclen, dstlen;
1399+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1400+
struct {
1401+
const char *ext;
1402+
unsigned move:1;
1403+
} files[] = {
1404+
{".pack", 0},
1405+
{".keep", 0},
1406+
{".rev", 0},
1407+
{".idx", 1}, /* The index file must be atomically moved last. */
1408+
};
1409+
1410+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1411+
1412+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1413+
strbuf_addstr(&src, srcdir);
1414+
strbuf_addch(&src, '/');
1415+
strbuf_add(&src, pack_filename, basenamelen);
1416+
strbuf_addstr(&src, ".idx");
1417+
1418+
/* A pack without an index file is not yet ready to be migrated. */
1419+
if (!file_exists(src.buf))
1420+
goto cleanup;
1421+
1422+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1423+
strbuf_addstr(&dst, dstdir);
1424+
strbuf_addch(&dst, '/');
1425+
strbuf_add(&dst, pack_filename, basenamelen);
1426+
1427+
srclen = src.len;
1428+
dstlen = dst.len;
1429+
1430+
/* Move or copy files from the source directory to the destination. */
1431+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1432+
strbuf_setlen(&src, srclen);
1433+
strbuf_addstr(&src, files[i].ext);
1434+
1435+
if (!file_exists(src.buf))
1436+
continue;
1437+
1438+
strbuf_setlen(&dst, dstlen);
1439+
strbuf_addstr(&dst, files[i].ext);
1440+
1441+
if (files[i].move)
1442+
rename_or_copy_or_die(src.buf, dst.buf);
1443+
else
1444+
link_or_copy_or_die(src.buf, dst.buf);
1445+
}
1446+
1447+
/*
1448+
* Now the pack and all associated files exist at the destination we can
1449+
* now clean up the files in the source directory.
1450+
*/
1451+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1452+
/* Files that were moved rather than copied have no clean up. */
1453+
if (files[i].move)
1454+
continue;
1455+
1456+
strbuf_setlen(&src, srclen);
1457+
strbuf_addstr(&src, files[i].ext);
1458+
1459+
/* Files that never existed in originally have no clean up.*/
1460+
if (!file_exists(src.buf))
1461+
continue;
1462+
1463+
if (unlink(src.buf))
1464+
warning_errno(_("failed to delete '%s'"), src.buf);
1465+
}
1466+
1467+
cleanup:
1468+
strbuf_release(&src);
1469+
strbuf_release(&dst);
1470+
1471+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1472+
}
1473+
1474+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1475+
const char *file_name, void *data)
1476+
{
1477+
char *srcdir;
1478+
const char *dstdir = (const char *)data;
1479+
1480+
/* We only care about the actual pack files here.
1481+
* The associated .idx, .keep, .rev files will be copied in tandem
1482+
* with the pack file, with the index file being moved last.
1483+
* The original locations of the non-index files will only deleted
1484+
* once all other files have been copied/moved.
1485+
*/
1486+
if (!ends_with(file_name, ".pack"))
1487+
return;
1488+
1489+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1490+
1491+
migrate_pack(srcdir, dstdir, file_name);
1492+
1493+
free(srcdir);
1494+
}
1495+
1496+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1497+
const char *path,
1498+
UNUSED void *data)
1499+
{
1500+
struct stat st;
1501+
struct strbuf dst = STRBUF_INIT;
1502+
char *hex = oid_to_hex(oid);
1503+
1504+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1505+
1506+
if (stat(dst.buf, &st)) {
1507+
if (mkdir(dst.buf, 0777))
1508+
die_errno(_("failed to create directory '%s'"), dst.buf);
1509+
} else if (!S_ISDIR(st.st_mode))
1510+
die(_("expected '%s' to be a directory"), dst.buf);
1511+
1512+
strbuf_addstr(&dst, hex+2);
1513+
rename_or_copy_or_die(path, dst.buf);
1514+
1515+
strbuf_release(&dst);
1516+
return 0;
1517+
}
1518+
1519+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1520+
UNUSED struct gc_config *cfg)
1521+
{
1522+
struct strbuf dstdir = STRBUF_INIT;
1523+
struct repository *r = the_repository;
1524+
1525+
/* This task is only applicable with a VFS/Scalar shared cache. */
1526+
if (!shared_object_dir)
1527+
return 0;
1528+
1529+
/* If the dest is the same as the local odb path then we do nothing. */
1530+
if (!fspathcmp(r->objects->odb->path, shared_object_dir))
1531+
goto cleanup;
1532+
1533+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1534+
1535+
for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_shared_cache,
1536+
dstdir.buf);
1537+
1538+
for_each_loose_object(move_loose_object_to_shared_cache, NULL,
1539+
FOR_EACH_OBJECT_LOCAL_ONLY);
1540+
1541+
cleanup:
1542+
strbuf_release(&dstdir);
1543+
return 0;
1544+
}
1545+
13631546
typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
13641547
struct gc_config *cfg);
13651548

@@ -1389,6 +1572,7 @@ enum maintenance_task_label {
13891572
TASK_GC,
13901573
TASK_COMMIT_GRAPH,
13911574
TASK_PACK_REFS,
1575+
TASK_CACHE_LOCAL_OBJS,
13921576

13931577
/* Leave as final value */
13941578
TASK__COUNT
@@ -1425,6 +1609,10 @@ static struct maintenance_task tasks[] = {
14251609
maintenance_task_pack_refs,
14261610
pack_refs_condition,
14271611
},
1612+
[TASK_CACHE_LOCAL_OBJS] = {
1613+
"cache-local-objects",
1614+
maintenance_task_cache_local_objs,
1615+
},
14281616
};
14291617

14301618
static int compare_tasks_by_selection(const void *a_, const void *b_)
@@ -1519,6 +1707,8 @@ static void initialize_maintenance_strategy(void)
15191707
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
15201708
tasks[TASK_PACK_REFS].enabled = 1;
15211709
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
1710+
tasks[TASK_CACHE_LOCAL_OBJS].enabled = 1;
1711+
tasks[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY;
15221712
}
15231713
}
15241714

t/t7900-maintenance.sh

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,25 @@ test_systemd_analyze_verify () {
3131
fi
3232
}
3333

34+
test_import_packfile () {
35+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
36+
git -c fastimport.unpackLimit=0 fast-import
37+
}
38+
39+
test_get_packdir_files() {
40+
if [ "$#" -eq 0 ]; then
41+
find .git/objects/pack -type f
42+
else
43+
for arg in "$@"; do
44+
find .git/objects/pack -type f -name $arg
45+
done
46+
fi
47+
}
48+
49+
test_get_loose_object_files () {
50+
find .git/objects -type f -path '.git/objects/??/*'
51+
}
52+
3453
test_expect_success 'help text' '
3554
test_expect_code 129 git maintenance -h >actual &&
3655
test_grep "usage: git maintenance <subcommand>" actual &&
@@ -1025,4 +1044,114 @@ test_expect_success 'maintenance aborts with existing lock file' '
10251044
test_grep "Another scheduled git-maintenance(1) process seems to be running" err
10261045
'
10271046

1047+
test_expect_success 'cache-local-objects task with no shared cache no op' '
1048+
test_when_finished "rm -rf repo" &&
1049+
git init repo &&
1050+
(
1051+
cd repo &&
1052+
1053+
test_commit something &&
1054+
git config set maintenance.gc.enabled false &&
1055+
git config set maintenance.cache-local-objects.enabled true &&
1056+
git config set maintenance.cache-local-objects.auto 1 &&
1057+
1058+
test_import_packfile &&
1059+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1060+
>files.txt &&
1061+
test_get_loose_object_files >>files.txt &&
1062+
1063+
git maintenance run &&
1064+
while IFS= read -r f; do
1065+
test_path_exists $f || exit 1
1066+
done <files.txt
1067+
)
1068+
'
1069+
1070+
test_expect_success 'cache-local-objects task cache path same as local odb no op' '
1071+
test_when_finished "rm -rf repo" &&
1072+
git init repo &&
1073+
(
1074+
cd repo &&
1075+
1076+
test_commit something &&
1077+
git config set gvfs.sharedcache .git/objects &&
1078+
git config set maintenance.gc.enabled false &&
1079+
git config set maintenance.cache-local-objects.enabled true &&
1080+
git config set maintenance.cache-local-objects.auto 1 &&
1081+
1082+
test_import_packfile &&
1083+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1084+
>files.txt &&
1085+
test_get_loose_object_files >>files.txt &&
1086+
1087+
git maintenance run &&
1088+
while IFS= read -r f; do
1089+
test_path_exists $f || exit 1
1090+
done <files.txt
1091+
)
1092+
'
1093+
1094+
test_expect_success 'cache-local-objects task no .rev or .keep' '
1095+
test_when_finished "rm -rf repo cache" &&
1096+
mkdir -p cache/pack &&
1097+
git init repo &&
1098+
(
1099+
cd repo &&
1100+
1101+
test_commit something &&
1102+
git config set gvfs.sharedcache ../cache &&
1103+
git config set maintenance.gc.enabled false &&
1104+
git config set maintenance.cache-local-objects.enabled true &&
1105+
git config set maintenance.cache-local-objects.auto 1 &&
1106+
1107+
test_import_packfile &&
1108+
test_get_packdir_files "*.pack" "*.idx" >src.txt &&
1109+
test_get_loose_object_files >>src.txt &&
1110+
1111+
rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep &&
1112+
1113+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1114+
1115+
git maintenance run &&
1116+
while IFS= read -r f; do
1117+
test_path_is_missing $f || exit 1
1118+
done <src.txt &&
1119+
1120+
while IFS= read -r f; do
1121+
test_path_exists $f || exit 1
1122+
done <dst.txt
1123+
)
1124+
'
1125+
1126+
test_expect_success 'cache-local-objects task success' '
1127+
test_when_finished "rm -rf repo cache" &&
1128+
mkdir -p cache/pack &&
1129+
git init repo &&
1130+
(
1131+
cd repo &&
1132+
1133+
test_commit something &&
1134+
git config set gvfs.sharedcache ../cache &&
1135+
git config set maintenance.gc.enabled false &&
1136+
git config set maintenance.cache-local-objects.enabled true &&
1137+
git config set maintenance.cache-local-objects.auto 1 &&
1138+
1139+
test_import_packfile &&
1140+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1141+
>src.txt &&
1142+
test_get_loose_object_files >>src.txt &&
1143+
1144+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1145+
1146+
git maintenance run &&
1147+
while IFS= read -r f; do
1148+
test_path_is_missing $f || exit 1
1149+
done <src.txt &&
1150+
1151+
while IFS= read -r f; do
1152+
test_path_exists $f || exit 1
1153+
done <dst.txt
1154+
)
1155+
'
1156+
10281157
test_done

0 commit comments

Comments
 (0)