Skip to content

Commit 942a819

Browse files
derrickstoleedscho
authored andcommitted
Merge pull request #315: unpack-trees:virtualfilesystem: Improve efficiency of clear_ce_flags
When the virtualfilesystem is enabled the previous implementation of clear_ce_flags would iterate all of the cache entries and query whether each one is in the virtual filesystem to determine whether to clear one of the SKIP_WORKTREE bits. For each cache entry, we would do a hash lookup for each parent directory in the is_included_in_virtualfilesystem function. The former approach is slow for a typical Windows OS enlistment with 3 million files where only a small percentage is in the virtual filesystem. The cost is O(n_index_entries * n_chars_per_path * n_parent_directories_per_path). In this change, we use the same approach as apply_virtualfilesystem, which iterates the set of entries in the virtualfilesystem and searches in the cache for the corresponding entries in order to clear their flags. This approach has a cost of O(n_virtual_filesystem_entries * n_chars_per_path * log(n_index_entries)). The apply_virtualfilesystem code was refactored a bit and modified to clear flags for all names that 'alias' a given virtual filesystem name when ignore_case is set. n_virtual_filesystem_entries is typically much less than n_index_entries, in which case the new approach is much faster. We wind up building the name hash for the index, but this occurs quickly thanks to the multi-threading.
2 parents c186248 + b6749e0 commit 942a819

File tree

5 files changed

+132
-65
lines changed

5 files changed

+132
-65
lines changed

name-hash.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,26 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na
747747
return NULL;
748748
}
749749

750+
struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase)
751+
{
752+
struct cache_entry *next;
753+
754+
if (!igncase || !ce) {
755+
return NULL;
756+
}
757+
758+
next = hashmap_get_next_entry(&istate->name_hash, ce, ent);
759+
if (!next)
760+
return NULL;
761+
762+
hashmap_for_each_entry_from(&istate->name_hash, next, ent) {
763+
if (same_name(next, ce->name, ce_namelen(ce), igncase))
764+
return next;
765+
}
766+
767+
return NULL;
768+
}
769+
750770
void free_name_hash(struct index_state *istate)
751771
{
752772
if (!istate->name_hash_initialized)

name-hash.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ int index_dir_find(struct index_state *istate, const char *name, int namelen,
1212

1313
void adjust_dirname_case(struct index_state *istate, char *name);
1414
struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
15+
struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase);
1516

1617
int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
1718
void add_name_hash(struct index_state *istate, struct cache_entry *ce);

unpack-trees.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1722,14 +1722,6 @@ static int clear_ce_flags_1(struct index_state *istate,
17221722
continue;
17231723
}
17241724

1725-
/* if it's not in the virtual file system, exit early */
1726-
if (core_virtualfilesystem) {
1727-
if (is_included_in_virtualfilesystem(ce->name, ce->ce_namelen) > 0)
1728-
ce->ce_flags &= ~clear_mask;
1729-
cache++;
1730-
continue;
1731-
}
1732-
17331725
if (prefix->len && strncmp(ce->name, prefix->buf, prefix->len))
17341726
break;
17351727

@@ -1806,12 +1798,19 @@ static int clear_ce_flags(struct index_state *istate,
18061798
xsnprintf(label, sizeof(label), "clear_ce_flags/0x%08lx_0x%08lx",
18071799
(unsigned long)select_mask, (unsigned long)clear_mask);
18081800
trace2_region_enter("unpack_trees", label, the_repository);
1809-
rval = clear_ce_flags_1(istate,
1810-
istate->cache,
1811-
istate->cache_nr,
1812-
&prefix,
1813-
select_mask, clear_mask,
1814-
pl, 0, 0);
1801+
if (core_virtualfilesystem) {
1802+
rval = clear_ce_flags_virtualfilesystem(istate,
1803+
select_mask,
1804+
clear_mask);
1805+
} else {
1806+
rval = clear_ce_flags_1(istate,
1807+
istate->cache,
1808+
istate->cache_nr,
1809+
&prefix,
1810+
select_mask, clear_mask,
1811+
pl, 0, 0);
1812+
}
1813+
18151814
trace2_region_leave("unpack_trees", label, the_repository);
18161815

18171816
stop_progress(&istate->progress);

virtualfilesystem.c

Lines changed: 91 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -252,93 +252,133 @@ int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dt
252252
return -1;
253253
}
254254

255-
/*
256-
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
257-
*/
258-
void apply_virtualfilesystem(struct index_state *istate)
255+
struct apply_virtual_filesystem_stats {
256+
int nr_unknown;
257+
int nr_vfs_dirs;
258+
int nr_vfs_rows;
259+
int nr_bulk_skip;
260+
int nr_explicit_skip;
261+
};
262+
263+
static void clear_ce_flags_virtualfilesystem_1(struct index_state *istate, int select_mask, int clear_mask,
264+
struct apply_virtual_filesystem_stats *stats)
259265
{
260266
char *buf, *entry;
261267
size_t i;
262-
int nr_unknown = 0;
263-
int nr_vfs_dirs = 0;
264-
int nr_vfs_rows = 0;
265-
int nr_bulk_skip = 0;
266-
int nr_explicit_skip = 0;
267-
268-
if (!repo_config_get_virtualfilesystem(istate->repo))
269-
return;
270-
271-
trace2_region_enter("vfs", "apply", the_repository);
272268

273269
if (!virtual_filesystem_data.len)
274270
get_virtual_filesystem_data(istate->repo, &virtual_filesystem_data);
275271

276-
/* set CE_SKIP_WORKTREE bit on all entries */
277-
for (i = 0; i < istate->cache_nr; i++)
278-
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
279-
280-
/* clear CE_SKIP_WORKTREE bit for everything in the virtual file system */
272+
/* clear specified flag bits for everything in the virtual file system */
281273
entry = buf = virtual_filesystem_data.buf;
282274
for (i = 0; i < virtual_filesystem_data.len; i++) {
283275
if (buf[i] == '\0') {
276+
struct cache_entry *ce;
284277
ssize_t pos, len;
285278

286-
nr_vfs_rows++;
279+
stats->nr_vfs_rows++;
287280

288281
len = buf + i - entry;
289282

290283
/* look for a directory wild card (ie "dir1/") */
291284
if (buf[i - 1] == '/') {
292-
nr_vfs_dirs++;
285+
stats->nr_vfs_dirs++;
293286
if (ignore_case)
294287
adjust_dirname_case(istate, entry);
295288
pos = index_name_pos(istate, entry, len);
296289
if (pos < 0) {
297-
pos = -pos - 1;
298-
while ((size_t)pos < istate->cache_nr && !fspathncmp(istate->cache[pos]->name, entry, len)) {
299-
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
300-
nr_bulk_skip++;
301-
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
302-
pos++;
290+
for (pos = -pos - 1; (size_t)pos < istate->cache_nr; pos++) {
291+
ce = istate->cache[pos];
292+
if (fspathncmp(ce->name, entry, len))
293+
break;
294+
295+
if (select_mask && !(ce->ce_flags & select_mask))
296+
continue;
297+
298+
if (ce->ce_flags & clear_mask)
299+
stats->nr_bulk_skip++;
300+
ce->ce_flags &= ~clear_mask;
303301
}
304302
}
305303
} else {
306304
if (ignore_case) {
307-
struct cache_entry *ce = index_file_exists(istate, entry, len, ignore_case);
308-
if (ce) {
309-
if (ce->ce_flags & CE_SKIP_WORKTREE)
310-
nr_explicit_skip++;
311-
ce->ce_flags &= ~CE_SKIP_WORKTREE;
312-
}
313-
else {
314-
nr_unknown++;
315-
}
305+
ce = index_file_exists(istate, entry, len, ignore_case);
316306
} else {
317307
int pos = index_name_pos(istate, entry, len);
318-
if (pos >= 0) {
319-
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
320-
nr_explicit_skip++;
321-
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
322-
}
323-
else {
324-
nr_unknown++;
325-
}
308+
309+
ce = NULL;
310+
if (pos >= 0)
311+
ce = istate->cache[pos];
312+
}
313+
314+
if (ce) {
315+
do {
316+
if (!select_mask || (ce->ce_flags & select_mask)) {
317+
if (ce->ce_flags & clear_mask)
318+
stats->nr_explicit_skip++;
319+
ce->ce_flags &= ~clear_mask;
320+
}
321+
322+
/*
323+
* There may be aliases with different cases of the same
324+
* name that also need to be modified.
325+
*/
326+
if (ignore_case)
327+
ce = index_file_next_match(istate, ce, ignore_case);
328+
else
329+
break;
330+
331+
} while (ce);
332+
} else {
333+
stats->nr_unknown++;
326334
}
327335
}
328336

329337
entry += len + 1;
330338
}
331339
}
340+
}
341+
342+
/*
343+
* Clear the specified flags for all entries in the virtual file system
344+
* that match the specified select mask. Returns the number of entries
345+
* processed.
346+
*/
347+
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask)
348+
{
349+
struct apply_virtual_filesystem_stats stats = {0};
350+
351+
clear_ce_flags_virtualfilesystem_1(istate, select_mask, clear_mask, &stats);
352+
return istate->cache_nr;
353+
}
354+
355+
/*
356+
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
357+
*/
358+
void apply_virtualfilesystem(struct index_state *istate)
359+
{
360+
size_t i;
361+
struct apply_virtual_filesystem_stats stats = {0};
362+
363+
if (!repo_config_get_virtualfilesystem(istate->repo))
364+
return;
365+
366+
trace2_region_enter("vfs", "apply", the_repository);
367+
368+
/* set CE_SKIP_WORKTREE bit on all entries */
369+
for (i = 0; i < istate->cache_nr; i++)
370+
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
332371

333-
if (nr_vfs_rows > 0) {
334-
trace2_data_intmax("vfs", the_repository, "apply/tracked", nr_bulk_skip + nr_explicit_skip);
372+
clear_ce_flags_virtualfilesystem_1(istate, 0, CE_SKIP_WORKTREE, &stats);
373+
if (stats.nr_vfs_rows > 0) {
374+
trace2_data_intmax("vfs", the_repository, "apply/tracked", stats.nr_bulk_skip + stats.nr_explicit_skip);
335375

336-
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", nr_vfs_rows);
337-
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", nr_vfs_dirs);
376+
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", stats.nr_vfs_rows);
377+
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", stats.nr_vfs_dirs);
338378

339-
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", nr_unknown);
340-
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", nr_bulk_skip);
341-
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", nr_explicit_skip);
379+
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", stats.nr_unknown);
380+
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", stats.nr_bulk_skip);
381+
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", stats.nr_explicit_skip);
342382
}
343383

344384
trace2_region_leave("vfs", "apply", the_repository);

virtualfilesystem.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
*/
77
void apply_virtualfilesystem(struct index_state *istate);
88

9+
/*
10+
* Clear the specified flags for all entries in the virtual file system
11+
* that match the specified select mask. Returns the number of entries
12+
* processed.
13+
*/
14+
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask);
15+
916
/*
1017
* Return 1 if the requested item is found in the virtual file system,
1118
* 0 for not found and -1 for undecided.

0 commit comments

Comments
 (0)