Skip to content

Avoid spawning gzip in git archive #145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Documentation/git-archive.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ tar.<format>.command::
format is given.
+
The "tar.gz" and "tgz" formats are defined automatically and default to
`gzip -cn`. You may override them with custom commands.
`:internal-gzip:`, triggering an in-process gzip compression. You may
override them with custom commands, e.g. `gzip -cn` or `pigz -cn`.

tar.<format>.remote::
If true, enable `<format>` for use by remote clients via
Expand Down
115 changes: 100 additions & 15 deletions archive-tar.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ static unsigned long offset;

static int tar_umask = 002;

static gzFile gzip;

static int write_tar_filter_archive(const struct archiver *ar,
struct archiver_args *args);

Expand All @@ -38,11 +40,21 @@ static int write_tar_filter_archive(const struct archiver *ar,
#define USTAR_MAX_MTIME 077777777777ULL
#endif

static int out_fd = 1, nr_threads = 0;

/* writes out the whole block, or dies if fails */
static void write_block_or_die(const char *block) {
if (!gzip)
write_or_die(out_fd, block, BLOCKSIZE);
else if (gzwrite(gzip, block, (unsigned) BLOCKSIZE) != BLOCKSIZE)
die(_("gzwrite failed"));
}

/* writes out the whole block, but only if it is full */
static void write_if_needed(void)
{
if (offset == BLOCKSIZE) {
write_or_die(1, block, BLOCKSIZE);
write_block_or_die(block);
offset = 0;
}
}
Expand All @@ -66,7 +78,7 @@ static void do_write_blocked(const void *data, unsigned long size)
write_if_needed();
}
while (size >= BLOCKSIZE) {
write_or_die(1, buf, BLOCKSIZE);
write_block_or_die(buf);
size -= BLOCKSIZE;
buf += BLOCKSIZE;
}
Expand Down Expand Up @@ -101,10 +113,10 @@ static void write_trailer(void)
{
int tail = BLOCKSIZE - offset;
memset(block + offset, 0, tail);
write_or_die(1, block, BLOCKSIZE);
write_block_or_die(block);
if (tail < 2 * RECORDSIZE) {
memset(block, 0, offset);
write_or_die(1, block, BLOCKSIZE);
write_block_or_die(block);
}
}

Expand Down Expand Up @@ -415,6 +427,13 @@ static int tar_filter_config(const char *var, const char *value, void *data)

static int git_tar_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "pack.threads")) {
nr_threads = git_config_int(var, value);
if (nr_threads < 0)
nr_threads = 1; /* fall back to single-threaded */
return 0;
}

if (!strcmp(var, "tar.umask")) {
if (value && !strcmp(value, "user")) {
tar_umask = umask(0);
Expand All @@ -440,6 +459,31 @@ static int write_tar_archive(const struct archiver *ar,
return err;
}

static int internal_gzip(int in, int out, void *data)
{
gzip = gzdopen(1, "wb");
if (!gzip)
return error(_("gzdopen failed"));
if (gzsetparams(gzip, *(int *)data, Z_DEFAULT_STRATEGY) != Z_OK)
return error(_("unable to set compression level"));

for (;;) {
char buf[BLOCKSIZE];
ssize_t read = xread(in, buf, sizeof(buf));
if (read < 0)
die_errno(_("read failed"));
if (read == 0)
break;
if (gzwrite(gzip, buf, read) != read)
die(_("gzwrite failed"));
}

close(in);
if (gzclose(gzip) != Z_OK)
return error(_("gzclose failed"));
return 0;
}

static int write_tar_filter_archive(const struct archiver *ar,
struct archiver_args *args)
{
Expand All @@ -451,6 +495,28 @@ static int write_tar_filter_archive(const struct archiver *ar,
if (!ar->data)
BUG("tar-filter archiver called with no filter defined");

if (!strcmp(ar->data, ":internal-gzip:") &&
/* use separate thread? */
(nr_threads > 1 || (nr_threads == 0 && online_cpus() > 1))) {
struct async filter = {
.proc = internal_gzip,
.data = &args->compression_level,
.in = -1
};

if (start_async(&filter))
return error(_("unable to fork off internal gzip"));
out_fd = filter.in;

r = write_tar_archive(ar, args);

close(out_fd);
if (finish_async(&filter))
return error(_("error in internal gzip"));

return r;
}

strbuf_addstr(&cmd, ar->data);
if (args->compression_level >= 0)
strbuf_addf(&cmd, " -%d", args->compression_level);
Expand All @@ -461,18 +527,37 @@ static int write_tar_filter_archive(const struct archiver *ar,
filter.use_shell = 1;
filter.in = -1;

if (start_command(&filter) < 0)
die_errno(_("unable to start '%s' filter"), argv[0]);
close(1);
if (dup2(filter.in, 1) < 0)
die_errno(_("unable to redirect descriptor"));
close(filter.in);
if (!strcmp(":internal-gzip:", ar->data)) {
gzip = gzdopen(fileno(stdout), "wb");
if (!gzip)
die(_("Could not gzdopen stdout"));
if (args->compression_level >= 0 &&
gzsetparams(gzip, args->compression_level,
Z_DEFAULT_STRATEGY) != Z_OK)
die(_("unable to set compression level %d"),
args->compression_level);
} else {
if (start_command(&filter) < 0)
die_errno(_("unable to start '%s' filter"), argv[0]);
close(1);
if (dup2(filter.in, 1) < 0)
die_errno(_("unable to redirect descriptor"));
close(filter.in);
}

r = write_tar_archive(ar, args);

close(1);
if (finish_command(&filter) != 0)
die(_("'%s' filter reported error"), argv[0]);
if (gzip) {
int ret = gzclose(gzip);
if (ret == Z_ERRNO)
die_errno(_("gzclose failed"));
else if (ret != Z_OK)
die(_("gzclose failed (%d)"), ret);
} else {
close(1);
if (finish_command(&filter) != 0)
die(_("'%s' filter reported error"), argv[0]);
}

strbuf_release(&cmd);
return r;
Expand All @@ -489,9 +574,9 @@ void init_tar_archiver(void)
int i;
register_archiver(&tar_archiver);

tar_filter_config("tar.tgz.command", "gzip -cn", NULL);
tar_filter_config("tar.tgz.command", ":internal-gzip:", NULL);
tar_filter_config("tar.tgz.remote", "true", NULL);
tar_filter_config("tar.tar.gz.command", "gzip -cn", NULL);
tar_filter_config("tar.tar.gz.command", ":internal-gzip:", NULL);
tar_filter_config("tar.tar.gz.remote", "true", NULL);
git_config(git_tar_config, NULL);
for (i = 0; i < nr_tar_filters; i++) {
Expand Down
14 changes: 14 additions & 0 deletions t/perf/p5005-archive-tgz.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh

test_description='Test archive --format=tgz performance'

. ./perf-lib.sh

test_perf_default_repo

test_perf 'archive --format=tgz' '
git archive --format=tgz HEAD >/dev/null
'

test_done

19 changes: 13 additions & 6 deletions t/t5000-tar-tree.sh
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ test_expect_success 'git archive with --output, override inferred format' '

test_expect_success GZIP 'git archive with --output and --remote creates .tgz' '
git archive --output=d5.tgz --remote=. HEAD &&
ls -l d5.tgz &&
gzip -d -c <d5.tgz >d5.tar &&
test_cmp_bin b.tar d5.tar
'
Expand Down Expand Up @@ -298,36 +299,42 @@ test_expect_success 'only enabled filters are available remotely' '
test_cmp_bin remote.bar config.bar
'

test_expect_success GZIP 'git archive --format=tgz' '
test_expect_success 'git archive --format=tgz' '
git archive --format=tgz HEAD >j.tgz
'

test_expect_success GZIP 'git archive --format=tar.gz' '
test_expect_success 'git archive --format=tar.gz' '
git archive --format=tar.gz HEAD >j1.tar.gz &&
test_cmp_bin j.tgz j1.tar.gz
'

test_expect_success GZIP 'infer tgz from .tgz filename' '
test_expect_success 'infer tgz from .tgz filename' '
git archive --output=j2.tgz HEAD &&
test_cmp_bin j.tgz j2.tgz
'

test_expect_success GZIP 'infer tgz from .tar.gz filename' '
test_expect_success 'infer tgz from .tar.gz filename' '
git archive --output=j3.tar.gz HEAD &&
test_cmp_bin j.tgz j3.tar.gz
'

test_expect_success 'use `archive.tgz.command=:internal-gzip:` explicitly' '
git -c archive.tgz.command=:internal-gzip: archive --output=j4.tgz \
HEAD &&
test_cmp_bin j.tgz j4.tgz
'

test_expect_success GZIP 'extract tgz file' '
gzip -d -c <j.tgz >j.tar &&
test_cmp_bin b.tar j.tar
'

test_expect_success GZIP 'remote tar.gz is allowed by default' '
test_expect_success 'remote tar.gz is allowed by default' '
git archive --remote=. --format=tar.gz HEAD >remote.tar.gz &&
test_cmp_bin j.tgz remote.tar.gz
'

test_expect_success GZIP 'remote tar.gz can be disabled' '
test_expect_success 'remote tar.gz can be disabled' '
git config tar.tar.gz.remote false &&
test_must_fail git archive --remote=. --format=tar.gz HEAD \
>remote.tar.gz
Expand Down