Skip to content

Teach Git to handle huge files in smudge/clean #3487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 29, 2021
2 changes: 1 addition & 1 deletion convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf,

struct filter_params {
const char *src;
unsigned long size;
size_t size;
int fd;
const char *cmd;
const char *path;
Expand Down
6 changes: 3 additions & 3 deletions delta.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,15 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
const unsigned char *top)
{
const unsigned char *data = *datap;
unsigned long cmd, size = 0;
size_t cmd, size = 0;
int i = 0;
do {
cmd = *data++;
size |= (cmd & 0x7f) << i;
size |= st_left_shift(cmd & 0x7f, i);
i += 7;
} while (cmd & 0x80 && data < top);
*datap = data;
return size;
return cast_size_t_to_ulong(size);
}

#endif
8 changes: 5 additions & 3 deletions entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode)
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}

void *read_blob_entry(const struct cache_entry *ce, unsigned long *size)
void *read_blob_entry(const struct cache_entry *ce, size_t *size)
{
enum object_type type;
void *blob_data = read_object_file(&ce->oid, &type, size);
unsigned long ul;
void *blob_data = read_object_file(&ce->oid, &type, &ul);

*size = ul;
if (blob_data) {
if (type == OBJ_BLOB)
return blob_data;
Expand Down Expand Up @@ -270,7 +272,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca
int fd, ret, fstat_done = 0;
char *new_blob;
struct strbuf buf = STRBUF_INIT;
unsigned long size;
size_t size;
ssize_t wrote;
size_t newsize = 0;
struct stat st;
Expand Down
2 changes: 1 addition & 1 deletion entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts);
*/
void unlink_entry(const struct cache_entry *ce);

void *read_blob_entry(const struct cache_entry *ce, unsigned long *size);
void *read_blob_entry(const struct cache_entry *ce, size_t *size);
int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st);
void update_ce_after_write(const struct checkout *state, struct cache_entry *ce,
struct stat *st);
Expand Down
25 changes: 25 additions & 0 deletions git-compat-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@
#define unsigned_mult_overflows(a, b) \
((a) && (b) > maximum_unsigned_value_of_type(a) / (a))

/*
* Returns true if the left shift of "a" by "shift" bits will
* overflow. The type of "a" must be unsigned.
*/
#define unsigned_left_shift_overflows(a, shift) \
((shift) < bitsizeof(a) && \
(a) > maximum_unsigned_value_of_type(a) >> (shift))

#ifdef __GNUC__
#define TYPEOF(x) (__typeof__(x))
#else
Expand Down Expand Up @@ -859,6 +867,23 @@ static inline size_t st_sub(size_t a, size_t b)
return a - b;
}

static inline size_t st_left_shift(size_t a, unsigned shift)
{
if (unsigned_left_shift_overflows(a, shift))
die("size_t overflow: %"PRIuMAX" << %u",
(uintmax_t)a, shift);
return a << shift;
}

static inline unsigned long cast_size_t_to_ulong(size_t a)
{
if (a != (unsigned long)a)
die("object too large to read on this platform: %"
PRIuMAX" is cut off to %lu",
(uintmax_t)a, (unsigned long)a);
return (unsigned long)a;
}

#ifdef HAVE_ALLOCA_H
# include <alloca.h>
# define xalloca(size) (alloca(size))
Expand Down
6 changes: 3 additions & 3 deletions object-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,7 @@ static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
unsigned int flags)
{
const char *type_buf = hdr;
unsigned long size;
size_t size;
int type, type_len = 0;

/*
Expand Down Expand Up @@ -1388,12 +1388,12 @@ static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
if (c > 9)
break;
hdr++;
size = size * 10 + c;
size = st_add(st_mult(size, 10), c);
}
}

if (oi->sizep)
*oi->sizep = size;
*oi->sizep = cast_size_t_to_ulong(size);

/*
* The length must be followed by a zero byte
Expand Down
6 changes: 3 additions & 3 deletions packfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
unsigned shift;
unsigned long size, c;
size_t size, c;
unsigned long used = 0;

c = buf[used++];
Expand All @@ -1073,10 +1073,10 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
break;
}
c = buf[used++];
size += (c & 0x7f) << shift;
size = st_add(size, st_left_shift(c & 0x7f, shift));
shift += 7;
}
*sizep = size;
*sizep = cast_size_t_to_ulong(size);
return used;
}

Expand Down
2 changes: 1 addition & 1 deletion parallel-checkout.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
struct stream_filter *filter;
struct strbuf buf = STRBUF_INIT;
char *blob;
unsigned long size;
size_t size;
ssize_t wrote;

/* Sanity check */
Expand Down
21 changes: 17 additions & 4 deletions t/helper/test-genzeros.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,31 @@

int cmd__genzeros(int argc, const char **argv)
{
long count;
/* static, so that it is NUL-initialized */
static const char zeros[256 * 1024];
intmax_t count;
ssize_t n;

if (argc > 2) {
fprintf(stderr, "usage: %s [<count>]\n", argv[0]);
return 1;
}

count = argc > 1 ? strtol(argv[1], NULL, 0) : -1L;
count = argc > 1 ? strtoimax(argv[1], NULL, 0) : -1;

while (count < 0 || count--) {
if (putchar(0) == EOF)
/* Writing out individual NUL bytes is slow... */
while (count < 0)
if (write(1, zeros, ARRAY_SIZE(zeros)) < 0)
return -1;

while (count > 0) {
n = write(1, zeros, count < ARRAY_SIZE(zeros) ?
count : ARRAY_SIZE(zeros));

if (n < 0)
return -1;

count -= n;
}

return 0;
Expand Down
25 changes: 25 additions & 0 deletions t/t1051-large-conversion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,29 @@ test_expect_success 'ident converts on output' '
test_cmp small.clean large.clean
'

# This smudge filter prepends 5GB of zeros to the file it checks out. This
# ensures that smudging doesn't mangle large files on 64-bit Windows.
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
'files over 4GB convert on output' '
test_commit test small "a small file" &&
test_config filter.makelarge.smudge \
"test-tool genzeros $((5*1024*1024*1024)) && cat" &&
echo "small filter=makelarge" >.gitattributes &&
rm small &&
git checkout -- small &&
size=$(test_file_size small) &&
test "$size" -ge $((5 * 1024 * 1024 * 1024))
'

# This clean filter writes down the size of input it receives. By checking against
# the actual size, we ensure that cleaning doesn't mangle large files on 64-bit Windows.
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
'files over 4GB convert on input' '
test-tool genzeros $((5*1024*1024*1024)) >big &&
test_config filter.checklarge.clean "wc -c >big.size" &&
echo "big filter=checklarge" >.gitattributes &&
git add big &&
test $(test_file_size big) -eq $(cat big.size)
'

test_done
4 changes: 4 additions & 0 deletions t/test-lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,10 @@ build_option () {
sed -ne "s/^$1: //p"
}

test_lazy_prereq SIZE_T_IS_64BIT '
test 8 -eq "$(build_option sizeof-size_t)"
'

test_lazy_prereq LONG_IS_64BIT '
test 8 -le "$(build_option sizeof-long)"
'
Expand Down