Skip to content
Closed
2 changes: 1 addition & 1 deletion Documentation/technical/commit-graph-format.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning
of the body. The header includes certain values, such as number of chunks
and hash type.

All 4-byte numbers are in network order.
All multi-byte numbers are in network byte order.

HEADER:

Expand Down
2 changes: 1 addition & 1 deletion commit-graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
last_chunk_id = 0;
last_chunk_offset = 8;
chunk_lookup = data + 8;
for (i = 0; i < graph->num_chunks; i++) {
for (i = 0; i <= graph->num_chunks; i++) {
uint32_t chunk_id;
uint64_t chunk_offset;
int chunk_repeated = 0;
Expand Down
1 change: 1 addition & 0 deletions commit-slab-decl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct slabname { \
void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, René Scharfe wrote (reply to this):

Am 05.06.20 um 15:00 schrieb SZEDER Gábor via GitGitGadget:
> From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>
>
> clear_##slabname() frees only the memory allocated for a commit slab
> itself, but entries in the commit slab might own additional memory
> outside the slab that should be freed as well.  We already have (at
> least) one such commit slab, and this patch series is about to add one
> more.
>
> To free all additional memory owned by entries on the commit slab the
> user of such a slab could iterate over all commits it knows about,
> peek whether there is a valid entry associated with each commit, and
> free the additional memory, if any.  Or it could rely on intimate
> knowledge about the internals of the commit slab implementation, and
> could itself iterate directly through all entries in the slab, and
> free the additional memory.  Or it could just leak the additional
> memory...
>
> Introduce deep_clear_##slabname() to allow releasing memory owned by
> commit slab entries by invoking the 'void free_fn(elemtype *ptr)'
> function specified as parameter for each entry in the slab.

Adding a new function instead of extending the existing ones makes
sense, as this is a rare requirement.

>
> Use it in get_shallow_commits() in 'shallow.c' to replace an
> open-coded iteration over a commit slab's entries.
>
> Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  commit-slab-decl.h |  1 +
>  commit-slab-impl.h | 13 +++++++++++++
>  commit-slab.h      | 10 ++++++++++
>  shallow.c          | 14 +++++---------
>  4 files changed, 29 insertions(+), 9 deletions(-)
>
> diff --git a/commit-slab-decl.h b/commit-slab-decl.h
> index adc7b46c83b..286164b7e27 100644
> --- a/commit-slab-decl.h
> +++ b/commit-slab-decl.h
> @@ -32,6 +32,7 @@ struct slabname {							\
>  void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
>  void init_ ##slabname(struct slabname *s);				\
>  void clear_ ##slabname(struct slabname *s);				\
> +void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
>  elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
>  elemtype *slabname## _at(struct slabname *s, const struct commit *c);	\
>  elemtype *slabname## _peek(struct slabname *s, const struct commit *c)
> diff --git a/commit-slab-impl.h b/commit-slab-impl.h
> index 5c0eb91a5d1..557738df271 100644
> --- a/commit-slab-impl.h
> +++ b/commit-slab-impl.h
> @@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s)			\
>  	FREE_AND_NULL(s->slab);						\
>  }									\
>  									\
> +scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
> +{									\
> +	unsigned int i;							\
> +	for (i = 0; i < s->slab_count; i++) {				\
> +		unsigned int j;						\
> +		if (!s->slab[i])					\
> +			continue;					\
> +		for (j = 0; j < s->slab_size; j++)			\
> +			free_fn(&s->slab[i][j * s->stride]);		\
> +	}								\
> +	clear_ ##slabname(s);						> +}									\


Why pass an elemtype pointer to the callback function instead of
a plain elemtype?  Because it matches the return type of _at() and
_peek().  Consistency, good.  Handing it a pointer allows the
callback to pass it on to free(), though, which would be bad,
since we do that in clear_() as well.  Hmm.

> +									\
>  scope elemtype *slabname## _at_peek(struct slabname *s,			\
>  						  const struct commit *c, \
>  						  int add_if_missing)   \
> diff --git a/commit-slab.h b/commit-slab.h
> index 05b3f2804e7..8e72a305365 100644
> --- a/commit-slab.h
> +++ b/commit-slab.h
> @@ -47,6 +47,16 @@
>   *
>   *   Call this function before the slab falls out of scope to avoid
>   *   leaking memory.
> + *
> + * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
> + *
> + *   Empties the slab, similar to clear_indegree(), but in addition it
> + *   calls the given 'free_fn' for each slab entry to release any
> + *   additional memory that might be owned by the entry (but not the
> + *   entry itself!).
> + *   Note that 'free_fn' might be called even for entries for which no
> + *   indegree_at() call has been made; in this case 'free_fn' is invoked
> + *   with a pointer to a zero-initialized location.
>   */
>
>  #define define_commit_slab(slabname, elemtype) \
> diff --git a/shallow.c b/shallow.c
> index 7fd04afed19..c4ac8a73273 100644
> --- a/shallow.c
> +++ b/shallow.c
> @@ -84,6 +84,10 @@ int is_repository_shallow(struct repository *r)
>   * supports a "valid" flag.
>   */
>  define_commit_slab(commit_depth, int *);
> +static void free_depth_in_slab(int **ptr)
> +{
> +	FREE_AND_NULL(*ptr);
> +}

Why FREE_AND_NULL?  The original loop below called free().  The slabs
are all released by deep_clear_() immediately after the callbacks are
done anyway, so what's the point in zeroing these pointers?

>  struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
>  		int shallow_flag, int not_shallow_flag)
>  {
> @@ -150,15 +154,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
>  			}
>  		}
>  	}
> -	for (i = 0; i < depths.slab_count; i++) {
> -		int j;
> -
> -		if (!depths.slab[i])
> -			continue;
> -		for (j = 0; j < depths.slab_size; j++)
> -			free(depths.slab[i][j]);
> -	}
> -	clear_commit_depth(&depths);
> +	deep_clear_commit_depth(&depths, free_depth_in_slab);

What a relief!

>
>  	return result;
>  }
>

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, Derrick Stolee wrote (reply to this):

On 6/18/2020 4:59 PM, René Scharfe wrote:
> Am 05.06.20 um 15:00 schrieb SZEDER Gábor via GitGitGadget:
>> From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>
>>
>> clear_##slabname() frees only the memory allocated for a commit slab
>> itself, but entries in the commit slab might own additional memory
>> outside the slab that should be freed as well.  We already have (at
>> least) one such commit slab, and this patch series is about to add one
>> more.
>>
>> To free all additional memory owned by entries on the commit slab the
>> user of such a slab could iterate over all commits it knows about,
>> peek whether there is a valid entry associated with each commit, and
>> free the additional memory, if any.  Or it could rely on intimate
>> knowledge about the internals of the commit slab implementation, and
>> could itself iterate directly through all entries in the slab, and
>> free the additional memory.  Or it could just leak the additional
>> memory...
>>
>> Introduce deep_clear_##slabname() to allow releasing memory owned by
>> commit slab entries by invoking the 'void free_fn(elemtype *ptr)'
>> function specified as parameter for each entry in the slab.
> 
> Adding a new function instead of extending the existing ones makes
> sense, as this is a rare requirement.
> 
>>
>> Use it in get_shallow_commits() in 'shallow.c' to replace an
>> open-coded iteration over a commit slab's entries.
>>
>> Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
>> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
>> ---
>>  commit-slab-decl.h |  1 +
>>  commit-slab-impl.h | 13 +++++++++++++
>>  commit-slab.h      | 10 ++++++++++
>>  shallow.c          | 14 +++++---------
>>  4 files changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/commit-slab-decl.h b/commit-slab-decl.h
>> index adc7b46c83b..286164b7e27 100644
>> --- a/commit-slab-decl.h
>> +++ b/commit-slab-decl.h
>> @@ -32,6 +32,7 @@ struct slabname {							\
>>  void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
>>  void init_ ##slabname(struct slabname *s);				\
>>  void clear_ ##slabname(struct slabname *s);				\
>> +void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
>>  elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
>>  elemtype *slabname## _at(struct slabname *s, const struct commit *c);	\
>>  elemtype *slabname## _peek(struct slabname *s, const struct commit *c)
>> diff --git a/commit-slab-impl.h b/commit-slab-impl.h
>> index 5c0eb91a5d1..557738df271 100644
>> --- a/commit-slab-impl.h
>> +++ b/commit-slab-impl.h
>> @@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s)			\
>>  	FREE_AND_NULL(s->slab);						\
>>  }									\
>>  									\
>> +scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
>> +{									\
>> +	unsigned int i;							\
>> +	for (i = 0; i < s->slab_count; i++) {				\
>> +		unsigned int j;						\
>> +		if (!s->slab[i])					\
>> +			continue;					\
>> +		for (j = 0; j < s->slab_size; j++)			\
>> +			free_fn(&s->slab[i][j * s->stride]);		\
>> +	}								\
>> +	clear_ ##slabname(s);						> +}									\
> 
> 
> Why pass an elemtype pointer to the callback function instead of
> a plain elemtype?  Because it matches the return type of _at() and
> _peek().  Consistency, good.  Handing it a pointer allows the
> callback to pass it on to free(), though, which would be bad,
> since we do that in clear_() as well.  Hmm.
> 
>> +									\
>>  scope elemtype *slabname## _at_peek(struct slabname *s,			\
>>  						  const struct commit *c, \
>>  						  int add_if_missing)   \
>> diff --git a/commit-slab.h b/commit-slab.h
>> index 05b3f2804e7..8e72a305365 100644
>> --- a/commit-slab.h
>> +++ b/commit-slab.h
>> @@ -47,6 +47,16 @@
>>   *
>>   *   Call this function before the slab falls out of scope to avoid
>>   *   leaking memory.
>> + *
>> + * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
>> + *
>> + *   Empties the slab, similar to clear_indegree(), but in addition it
>> + *   calls the given 'free_fn' for each slab entry to release any
>> + *   additional memory that might be owned by the entry (but not the
>> + *   entry itself!).
>> + *   Note that 'free_fn' might be called even for entries for which no
>> + *   indegree_at() call has been made; in this case 'free_fn' is invoked
>> + *   with a pointer to a zero-initialized location.
>>   */
>>
>>  #define define_commit_slab(slabname, elemtype) \
>> diff --git a/shallow.c b/shallow.c
>> index 7fd04afed19..c4ac8a73273 100644
>> --- a/shallow.c
>> +++ b/shallow.c
>> @@ -84,6 +84,10 @@ int is_repository_shallow(struct repository *r)
>>   * supports a "valid" flag.
>>   */
>>  define_commit_slab(commit_depth, int *);
>> +static void free_depth_in_slab(int **ptr)
>> +{
>> +	FREE_AND_NULL(*ptr);
>> +}
> 
> Why FREE_AND_NULL?  The original loop below called free().  The slabs
> are all released by deep_clear_() immediately after the callbacks are
> done anyway, so what's the point in zeroing these pointers?

I think the point was that a later change was going to free
elements in the slab on a one-by-one basis while computing
the filters, to save memory overall. To be future-proof
against such a change, we need to NULL the pointers here.

Perhaps that viewpoint also answers your other comment about
"why pass the pointer?"

Thanks,
-Stolee

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the Git mailing list, SZEDER Gábor wrote (reply to this):

On Fri, Jun 05, 2020 at 01:00:26PM +0000, SZEDER Gábor via GitGitGadget wrote:
> From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>
> 
> clear_##slabname() frees only the memory allocated for a commit slab
> itself, but entries in the commit slab might own additional memory
> outside the slab that should be freed as well.  We already have (at
> least) one such commit slab, and this patch series is about to add one
> more.

This was only true in my original submission, but not anymore: this
patch series doesn't add another such slab, and, more importantly, now
we have at least two such commit slabs.  deep_clear_##slabnmae() could
be used to clear the bloom_filter_slab and all memory attached to it,
which at the moment is just leaked.

> To free all additional memory owned by entries on the commit slab the
> user of such a slab could iterate over all commits it knows about,
> peek whether there is a valid entry associated with each commit, and
> free the additional memory, if any.  Or it could rely on intimate
> knowledge about the internals of the commit slab implementation, and
> could itself iterate directly through all entries in the slab, and
> free the additional memory.  Or it could just leak the additional
> memory...
> 
> Introduce deep_clear_##slabname() to allow releasing memory owned by
> commit slab entries by invoking the 'void free_fn(elemtype *ptr)'
> function specified as parameter for each entry in the slab.
> 
> Use it in get_shallow_commits() in 'shallow.c' to replace an
> open-coded iteration over a commit slab's entries.
> 
> Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  commit-slab-decl.h |  1 +
>  commit-slab-impl.h | 13 +++++++++++++
>  commit-slab.h      | 10 ++++++++++
>  shallow.c          | 14 +++++---------
>  4 files changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/commit-slab-decl.h b/commit-slab-decl.h
> index adc7b46c83b..286164b7e27 100644
> --- a/commit-slab-decl.h
> +++ b/commit-slab-decl.h
> @@ -32,6 +32,7 @@ struct slabname {							\
>  void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
>  void init_ ##slabname(struct slabname *s);				\
>  void clear_ ##slabname(struct slabname *s);				\
> +void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
>  elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
>  elemtype *slabname## _at(struct slabname *s, const struct commit *c);	\
>  elemtype *slabname## _peek(struct slabname *s, const struct commit *c)
> diff --git a/commit-slab-impl.h b/commit-slab-impl.h
> index 5c0eb91a5d1..557738df271 100644
> --- a/commit-slab-impl.h
> +++ b/commit-slab-impl.h
> @@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s)			\
>  	FREE_AND_NULL(s->slab);						\
>  }									\
>  									\
> +scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
> +{									\
> +	unsigned int i;							\
> +	for (i = 0; i < s->slab_count; i++) {				\
> +		unsigned int j;						\
> +		if (!s->slab[i])					\
> +			continue;					\
> +		for (j = 0; j < s->slab_size; j++)			\
> +			free_fn(&s->slab[i][j * s->stride]);		\
> +	}								\
> +	clear_ ##slabname(s);						\
> +}									\
> +									\
>  scope elemtype *slabname## _at_peek(struct slabname *s,			\
>  						  const struct commit *c, \
>  						  int add_if_missing)   \
> diff --git a/commit-slab.h b/commit-slab.h
> index 05b3f2804e7..8e72a305365 100644
> --- a/commit-slab.h
> +++ b/commit-slab.h
> @@ -47,6 +47,16 @@
>   *
>   *   Call this function before the slab falls out of scope to avoid
>   *   leaking memory.
> + *
> + * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
> + *
> + *   Empties the slab, similar to clear_indegree(), but in addition it
> + *   calls the given 'free_fn' for each slab entry to release any
> + *   additional memory that might be owned by the entry (but not the
> + *   entry itself!).
> + *   Note that 'free_fn' might be called even for entries for which no
> + *   indegree_at() call has been made; in this case 'free_fn' is invoked
> + *   with a pointer to a zero-initialized location.
>   */
>  
>  #define define_commit_slab(slabname, elemtype) \
> diff --git a/shallow.c b/shallow.c
> index 7fd04afed19..c4ac8a73273 100644
> --- a/shallow.c
> +++ b/shallow.c
> @@ -84,6 +84,10 @@ int is_repository_shallow(struct repository *r)
>   * supports a "valid" flag.
>   */
>  define_commit_slab(commit_depth, int *);
> +static void free_depth_in_slab(int **ptr)
> +{
> +	FREE_AND_NULL(*ptr);
> +}
>  struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
>  		int shallow_flag, int not_shallow_flag)
>  {
> @@ -150,15 +154,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
>  			}
>  		}
>  	}
> -	for (i = 0; i < depths.slab_count; i++) {
> -		int j;
> -
> -		if (!depths.slab[i])
> -			continue;
> -		for (j = 0; j < depths.slab_size; j++)
> -			free(depths.slab[i][j]);
> -	}
> -	clear_commit_depth(&depths);
> +	deep_clear_commit_depth(&depths, free_depth_in_slab);
>  
>  	return result;
>  }
> -- 
> gitgitgadget
> 

void init_ ##slabname(struct slabname *s); \
void clear_ ##slabname(struct slabname *s); \
void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
elemtype *slabname## _at(struct slabname *s, const struct commit *c); \
elemtype *slabname## _peek(struct slabname *s, const struct commit *c)
Expand Down
13 changes: 13 additions & 0 deletions commit-slab-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \
FREE_AND_NULL(s->slab); \
} \
\
scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
{ \
unsigned int i; \
for (i = 0; i < s->slab_count; i++) { \
unsigned int j; \
if (!s->slab[i]) \
continue; \
for (j = 0; j < s->slab_size; j++) \
free_fn(&s->slab[i][j * s->stride]); \
} \
clear_ ##slabname(s); \
} \
\
scope elemtype *slabname## _at_peek(struct slabname *s, \
const struct commit *c, \
int add_if_missing) \
Expand Down
10 changes: 10 additions & 0 deletions commit-slab.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@
*
* Call this function before the slab falls out of scope to avoid
* leaking memory.
*
* - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
*
* Empties the slab, similar to clear_indegree(), but in addition it
* calls the given 'free_fn' for each slab entry to release any
* additional memory that might be owned by the entry (but not the
* entry itself!).
* Note that 'free_fn' might be called even for entries for which no
* indegree_at() call has been made; in this case 'free_fn' is invoked
* with a pointer to a zero-initialized location.
*/

#define define_commit_slab(slabname, elemtype) \
Expand Down
14 changes: 5 additions & 9 deletions shallow.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ int is_repository_shallow(struct repository *r)
* supports a "valid" flag.
*/
define_commit_slab(commit_depth, int *);
static void free_depth_in_slab(int **ptr)
{
FREE_AND_NULL(*ptr);
}
struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag)
{
Expand Down Expand Up @@ -150,15 +154,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
}
}
}
for (i = 0; i < depths.slab_count; i++) {
int j;

if (!depths.slab[i])
continue;
for (j = 0; j < depths.slab_size; j++)
free(depths.slab[i][j]);
}
clear_commit_depth(&depths);
deep_clear_commit_depth(&depths, free_depth_in_slab);

return result;
}
Expand Down
4 changes: 3 additions & 1 deletion t/t4010-diff-pathspec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ test_expect_success 'setup submodules' '
test_expect_success 'diff-tree ignores trailing slash on submodule path' '
git diff --name-only HEAD^ HEAD submod >expect &&
git diff --name-only HEAD^ HEAD submod/ >actual &&
test_cmp expect actual
test_cmp expect actual &&
git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
test_must_be_empty actual
'

test_expect_success 'diff multiple wildcard pathspecs' '
Expand Down
2 changes: 1 addition & 1 deletion t/t5318-commit-graph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ test_expect_success 'detect bad hash version' '
'

test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk"
'

Expand Down
9 changes: 8 additions & 1 deletion tree-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item,
if (matchlen > pathlen) {
if (match[pathlen] != '/')
return 0;
if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
/*
* Reject non-directories as partial pathnames, except
* when match is a submodule with a trailing slash and
* nothing else (to handle 'submod/' and 'submod'
* uniformly).
*/
if (!S_ISDIR(entry->mode) &&
(!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
return 0;
}

Expand Down