Skip to content

Commit b5b340d

Browse files
authored
maintenance: add new cache-local-objects maintenance task (#720)
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Fixes #716
2 parents 5b0f528 + 03e984b commit b5b340d

File tree

4 files changed

+336
-5
lines changed

4 files changed

+336
-5
lines changed

Documentation/git-maintenance.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ task:
6969
* `prefetch`: hourly.
7070
* `loose-objects`: daily.
7171
* `incremental-repack`: daily.
72+
* `cache-local-objects`: weekly.
7273
--
7374
+
7475
`git maintenance register` will also disable foreground maintenance by
@@ -158,6 +159,13 @@ pack-refs::
158159
need to iterate across many references. See linkgit:git-pack-refs[1]
159160
for more information.
160161

162+
cache-local-objects::
163+
The `cache-local-objects` task only operates on Scalar or VFS for Git
164+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
165+
have the `gvfs.sharedCache` configuration setting present. This task
166+
migrates pack files and loose objects from the repository's object
167+
directory in to the shared volume cache.
168+
161169
OPTIONS
162170
-------
163171
--auto::

builtin/gc.c

Lines changed: 197 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Copyright (c) 2006 Shawn O. Pearce
1111
*/
1212
#define USE_THE_REPOSITORY_VARIABLE
13+
#include "git-compat-util.h"
1314
#include "builtin.h"
1415
#include "abspath.h"
1516
#include "date.h"
@@ -41,6 +42,8 @@
4142
#include "hook.h"
4243
#include "setup.h"
4344
#include "trace2.h"
45+
#include "copy.h"
46+
#include "dir.h"
4447

4548
#define FAILED_RUN "failed to run %s"
4649

@@ -1141,17 +1144,19 @@ static int write_loose_object_to_stdin(const struct object_id *oid,
11411144
return ++(d->count) > d->batch_size;
11421145
}
11431146

1144-
static const char *object_dir = NULL;
1147+
static const char *shared_object_dir = NULL;
11451148

11461149
static int pack_loose(struct maintenance_run_opts *opts)
11471150
{
11481151
struct repository *r = the_repository;
11491152
int result = 0;
11501153
struct write_loose_object_data data;
11511154
struct child_process pack_proc = CHILD_PROCESS_INIT;
1155+
const char *object_dir = r->objects->odb->path;
11521156

1153-
if (!object_dir)
1154-
object_dir = r->objects->odb->path;
1157+
/* If set, use the shared object directory. */
1158+
if (shared_object_dir)
1159+
object_dir = shared_object_dir;
11551160

11561161
/*
11571162
* Do not start pack-objects process
@@ -1345,6 +1350,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
13451350
return 0;
13461351
}
13471352

1353+
static void link_or_copy_or_die(const char *src, const char *dst)
1354+
{
1355+
if (!link(src, dst))
1356+
return;
1357+
1358+
/* Use copy operation if src and dst are on different file systems. */
1359+
if (errno != EXDEV)
1360+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1361+
1362+
if (copy_file(dst, src, 0444))
1363+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1364+
}
1365+
1366+
static void rename_or_copy_or_die(const char *src, const char *dst)
1367+
{
1368+
if (!rename(src, dst))
1369+
return;
1370+
1371+
/* Use copy and delete if src and dst are on different file systems. */
1372+
if (errno != EXDEV)
1373+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1374+
1375+
if (copy_file(dst, src, 0444))
1376+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1377+
1378+
if (unlink(src))
1379+
die_errno(_("failed to delete '%s'"), src);
1380+
}
1381+
1382+
static void migrate_pack(const char *srcdir, const char *dstdir,
1383+
const char *pack_filename)
1384+
{
1385+
size_t basenamelen, srclen, dstlen;
1386+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1387+
struct {
1388+
const char *ext;
1389+
unsigned move:1;
1390+
} files[] = {
1391+
{".pack", 0},
1392+
{".keep", 0},
1393+
{".rev", 0},
1394+
{".idx", 1}, /* The index file must be atomically moved last. */
1395+
};
1396+
1397+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1398+
1399+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1400+
strbuf_addstr(&src, srcdir);
1401+
strbuf_addch(&src, '/');
1402+
strbuf_add(&src, pack_filename, basenamelen);
1403+
strbuf_addstr(&src, ".idx");
1404+
1405+
/* A pack without an index file is not yet ready to be migrated. */
1406+
if (!file_exists(src.buf))
1407+
goto cleanup;
1408+
1409+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1410+
strbuf_addstr(&dst, dstdir);
1411+
strbuf_addch(&dst, '/');
1412+
strbuf_add(&dst, pack_filename, basenamelen);
1413+
1414+
srclen = src.len;
1415+
dstlen = dst.len;
1416+
1417+
/* Move or copy files from the source directory to the destination. */
1418+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1419+
strbuf_setlen(&src, srclen);
1420+
strbuf_addstr(&src, files[i].ext);
1421+
1422+
if (!file_exists(src.buf))
1423+
continue;
1424+
1425+
strbuf_setlen(&dst, dstlen);
1426+
strbuf_addstr(&dst, files[i].ext);
1427+
1428+
if (files[i].move)
1429+
rename_or_copy_or_die(src.buf, dst.buf);
1430+
else
1431+
link_or_copy_or_die(src.buf, dst.buf);
1432+
}
1433+
1434+
/*
1435+
* Now the pack and all associated files exist at the destination we can
1436+
* now clean up the files in the source directory.
1437+
*/
1438+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1439+
/* Files that were moved rather than copied have no clean up. */
1440+
if (files[i].move)
1441+
continue;
1442+
1443+
strbuf_setlen(&src, srclen);
1444+
strbuf_addstr(&src, files[i].ext);
1445+
1446+
/* Files that never existed in originally have no clean up.*/
1447+
if (!file_exists(src.buf))
1448+
continue;
1449+
1450+
if (unlink(src.buf))
1451+
warning_errno(_("failed to delete '%s'"), src.buf);
1452+
}
1453+
1454+
cleanup:
1455+
strbuf_release(&src);
1456+
strbuf_release(&dst);
1457+
1458+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1459+
}
1460+
1461+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1462+
const char *file_name, void *data)
1463+
{
1464+
char *srcdir;
1465+
const char *dstdir = (const char *)data;
1466+
1467+
/* We only care about the actual pack files here.
1468+
* The associated .idx, .keep, .rev files will be copied in tandem
1469+
* with the pack file, with the index file being moved last.
1470+
* The original locations of the non-index files will only deleted
1471+
* once all other files have been copied/moved.
1472+
*/
1473+
if (!ends_with(file_name, ".pack"))
1474+
return;
1475+
1476+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1477+
1478+
migrate_pack(srcdir, dstdir, file_name);
1479+
1480+
free(srcdir);
1481+
}
1482+
1483+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1484+
const char *path,
1485+
UNUSED void *data)
1486+
{
1487+
struct stat st;
1488+
struct strbuf dst = STRBUF_INIT;
1489+
char *hex = oid_to_hex(oid);
1490+
1491+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1492+
1493+
if (stat(dst.buf, &st)) {
1494+
if (mkdir(dst.buf, 0777))
1495+
die_errno(_("failed to create directory '%s'"), dst.buf);
1496+
} else if (!S_ISDIR(st.st_mode))
1497+
die(_("expected '%s' to be a directory"), dst.buf);
1498+
1499+
strbuf_addstr(&dst, hex+2);
1500+
rename_or_copy_or_die(path, dst.buf);
1501+
1502+
strbuf_release(&dst);
1503+
return 0;
1504+
}
1505+
1506+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1507+
UNUSED struct gc_config *cfg)
1508+
{
1509+
struct strbuf dstdir = STRBUF_INIT;
1510+
struct repository *r = the_repository;
1511+
1512+
/* This task is only applicable with a VFS/Scalar shared cache. */
1513+
if (!shared_object_dir)
1514+
return 0;
1515+
1516+
/* If the dest is the same as the local odb path then we do nothing. */
1517+
if (!fspathcmp(r->objects->odb->path, shared_object_dir))
1518+
goto cleanup;
1519+
1520+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1521+
1522+
for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_shared_cache,
1523+
dstdir.buf);
1524+
1525+
for_each_loose_object(move_loose_object_to_shared_cache, NULL,
1526+
FOR_EACH_OBJECT_LOCAL_ONLY);
1527+
1528+
cleanup:
1529+
strbuf_release(&dstdir);
1530+
return 0;
1531+
}
1532+
13481533
typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
13491534
struct gc_config *cfg);
13501535

@@ -1374,6 +1559,7 @@ enum maintenance_task_label {
13741559
TASK_GC,
13751560
TASK_COMMIT_GRAPH,
13761561
TASK_PACK_REFS,
1562+
TASK_CACHE_LOCAL_OBJS,
13771563

13781564
/* Leave as final value */
13791565
TASK__COUNT
@@ -1410,6 +1596,10 @@ static struct maintenance_task tasks[] = {
14101596
maintenance_task_pack_refs,
14111597
pack_refs_condition,
14121598
},
1599+
[TASK_CACHE_LOCAL_OBJS] = {
1600+
"cache-local-objects",
1601+
maintenance_task_cache_local_objs,
1602+
},
14131603
};
14141604

14151605
static int compare_tasks_by_selection(const void *a_, const void *b_)
@@ -1504,6 +1694,8 @@ static void initialize_maintenance_strategy(void)
15041694
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
15051695
tasks[TASK_PACK_REFS].enabled = 1;
15061696
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
1697+
tasks[TASK_CACHE_LOCAL_OBJS].enabled = 1;
1698+
tasks[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY;
15071699
}
15081700
}
15091701

@@ -1620,8 +1812,8 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
16201812
*/
16211813
if (!git_config_get_value("gvfs.sharedcache", &tmp_obj_dir) &&
16221814
tmp_obj_dir) {
1623-
object_dir = xstrdup(tmp_obj_dir);
1624-
setenv(DB_ENVIRONMENT, object_dir, 1);
1815+
shared_object_dir = xstrdup(tmp_obj_dir);
1816+
setenv(DB_ENVIRONMENT, shared_object_dir, 1);
16251817
}
16261818

16271819
ret = maintenance_run_tasks(&opts, &cfg);

scalar.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,7 @@ static int cmd_run(int argc, const char **argv)
11631163
{ "fetch", "prefetch" },
11641164
{ "loose-objects", "loose-objects" },
11651165
{ "pack-files", "incremental-repack" },
1166+
{ "cache-local-objects", "cache-local-objects" },
11661167
{ NULL, NULL }
11671168
};
11681169
struct strbuf buf = STRBUF_INIT;

0 commit comments

Comments
 (0)