Skip to content

Commit 3a46b8a

Browse files
derrickstoleejeffhostetler
authored andcommitted
Merge pull request #315: unpack-trees:virtualfilesystem: Improve efficiency of clear_ce_flags
When the virtualfilesystem is enabled the previous implementation of clear_ce_flags would iterate all of the cache entries and query whether each one is in the virtual filesystem to determine whether to clear one of the SKIP_WORKTREE bits. For each cache entry, we would do a hash lookup for each parent directory in the is_included_in_virtualfilesystem function. The former approach is slow for a typical Windows OS enlistment with 3 million files where only a small percentage is in the virtual filesystem. The cost is O(n_index_entries * n_chars_per_path * n_parent_directories_per_path). In this change, we use the same approach as apply_virtualfilesystem, which iterates the set of entries in the virtualfilesystem and searches in the cache for the corresponding entries in order to clear their flags. This approach has a cost of O(n_virtual_filesystem_entries * n_chars_per_path * log(n_index_entries)). The apply_virtualfilesystem code was refactored a bit and modified to clear flags for all names that 'alias' a given virtual filesystem name when ignore_case is set. n_virtual_filesystem_entries is typically much less than n_index_entries, in which case the new approach is much faster. We wind up building the name hash for the index, but this occurs quickly thanks to the multi-threading.
2 parents c8b017a + 7430b15 commit 3a46b8a

File tree

5 files changed

+132
-65
lines changed

5 files changed

+132
-65
lines changed

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,7 @@ int strcmp_offset(const char *s1, const char *s2, size_t *first_change);
767767
int index_dir_exists(struct index_state *istate, const char *name, int namelen);
768768
void adjust_dirname_case(struct index_state *istate, char *name);
769769
struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
770+
struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase);
770771

771772
/*
772773
* Searches for an entry defined by name and namelen in the given index.

name-hash.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,26 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na
723723
return NULL;
724724
}
725725

726+
struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase)
727+
{
728+
struct cache_entry *next;
729+
730+
if (!igncase || !ce) {
731+
return NULL;
732+
}
733+
734+
next = hashmap_get_next_entry(&istate->name_hash, ce, ent);
735+
if (!next)
736+
return NULL;
737+
738+
hashmap_for_each_entry_from(&istate->name_hash, next, ent) {
739+
if (same_name(next, ce->name, ce_namelen(ce), igncase))
740+
return next;
741+
}
742+
743+
return NULL;
744+
}
745+
726746
void free_name_hash(struct index_state *istate)
727747
{
728748
if (!istate->name_hash_initialized)

unpack-trees.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,14 +1448,6 @@ static int clear_ce_flags_1(struct index_state *istate,
14481448
continue;
14491449
}
14501450

1451-
/* if it's not in the virtual file system, exit early */
1452-
if (core_virtualfilesystem) {
1453-
if (is_included_in_virtualfilesystem(ce->name, ce->ce_namelen) > 0)
1454-
ce->ce_flags &= ~clear_mask;
1455-
cache++;
1456-
continue;
1457-
}
1458-
14591451
if (prefix->len && strncmp(ce->name, prefix->buf, prefix->len))
14601452
break;
14611453

@@ -1532,12 +1524,19 @@ static int clear_ce_flags(struct index_state *istate,
15321524
xsnprintf(label, sizeof(label), "clear_ce_flags/0x%08lx_0x%08lx",
15331525
(unsigned long)select_mask, (unsigned long)clear_mask);
15341526
trace2_region_enter("unpack_trees", label, the_repository);
1535-
rval = clear_ce_flags_1(istate,
1536-
istate->cache,
1537-
istate->cache_nr,
1538-
&prefix,
1539-
select_mask, clear_mask,
1540-
pl, 0, 0);
1527+
if (core_virtualfilesystem) {
1528+
rval = clear_ce_flags_virtualfilesystem(istate,
1529+
select_mask,
1530+
clear_mask);
1531+
} else {
1532+
rval = clear_ce_flags_1(istate,
1533+
istate->cache,
1534+
istate->cache_nr,
1535+
&prefix,
1536+
select_mask, clear_mask,
1537+
pl, 0, 0);
1538+
}
1539+
15411540
trace2_region_leave("unpack_trees", label, the_repository);
15421541

15431542
stop_progress(&istate->progress);

virtualfilesystem.c

Lines changed: 91 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -247,93 +247,133 @@ int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dt
247247
return -1;
248248
}
249249

250-
/*
251-
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
252-
*/
253-
void apply_virtualfilesystem(struct index_state *istate)
250+
struct apply_virtual_filesystem_stats {
251+
int nr_unknown;
252+
int nr_vfs_dirs;
253+
int nr_vfs_rows;
254+
int nr_bulk_skip;
255+
int nr_explicit_skip;
256+
};
257+
258+
static void clear_ce_flags_virtualfilesystem_1(struct index_state *istate, int select_mask, int clear_mask,
259+
struct apply_virtual_filesystem_stats *stats)
254260
{
255261
char *buf, *entry;
256262
int i;
257-
int nr_unknown = 0;
258-
int nr_vfs_dirs = 0;
259-
int nr_vfs_rows = 0;
260-
int nr_bulk_skip = 0;
261-
int nr_explicit_skip = 0;
262-
263-
if (!git_config_get_virtualfilesystem())
264-
return;
265-
266-
trace2_region_enter("vfs", "apply", the_repository);
267263

268264
if (!virtual_filesystem_data.len)
269265
get_virtual_filesystem_data(&virtual_filesystem_data);
270266

271-
/* set CE_SKIP_WORKTREE bit on all entries */
272-
for (i = 0; i < istate->cache_nr; i++)
273-
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
274-
275-
/* clear CE_SKIP_WORKTREE bit for everything in the virtual file system */
267+
/* clear specified flag bits for everything in the virtual file system */
276268
entry = buf = virtual_filesystem_data.buf;
277269
for (i = 0; i < virtual_filesystem_data.len; i++) {
278270
if (buf[i] == '\0') {
271+
struct cache_entry *ce;
279272
int pos, len;
280273

281-
nr_vfs_rows++;
274+
stats->nr_vfs_rows++;
282275

283276
len = buf + i - entry;
284277

285278
/* look for a directory wild card (ie "dir1/") */
286279
if (buf[i - 1] == '/') {
287-
nr_vfs_dirs++;
280+
stats->nr_vfs_dirs++;
288281
if (ignore_case)
289282
adjust_dirname_case(istate, entry);
290283
pos = index_name_pos(istate, entry, len);
291284
if (pos < 0) {
292-
pos = -pos - 1;
293-
while (pos < istate->cache_nr && !fspathncmp(istate->cache[pos]->name, entry, len)) {
294-
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
295-
nr_bulk_skip++;
296-
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
297-
pos++;
285+
for (pos = -pos - 1; pos < istate->cache_nr; pos++) {
286+
ce = istate->cache[pos];
287+
if (fspathncmp(ce->name, entry, len))
288+
break;
289+
290+
if (select_mask && !(ce->ce_flags & select_mask))
291+
continue;
292+
293+
if (ce->ce_flags & clear_mask)
294+
stats->nr_bulk_skip++;
295+
ce->ce_flags &= ~clear_mask;
298296
}
299297
}
300298
} else {
301299
if (ignore_case) {
302-
struct cache_entry *ce = index_file_exists(istate, entry, len, ignore_case);
303-
if (ce) {
304-
if (ce->ce_flags & CE_SKIP_WORKTREE)
305-
nr_explicit_skip++;
306-
ce->ce_flags &= ~CE_SKIP_WORKTREE;
307-
}
308-
else {
309-
nr_unknown++;
310-
}
300+
ce = index_file_exists(istate, entry, len, ignore_case);
311301
} else {
312302
int pos = index_name_pos(istate, entry, len);
313-
if (pos >= 0) {
314-
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
315-
nr_explicit_skip++;
316-
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
317-
}
318-
else {
319-
nr_unknown++;
320-
}
303+
304+
ce = NULL;
305+
if (pos >= 0)
306+
ce = istate->cache[pos];
307+
}
308+
309+
if (ce) {
310+
do {
311+
if (!select_mask || (ce->ce_flags & select_mask)) {
312+
if (ce->ce_flags & clear_mask)
313+
stats->nr_explicit_skip++;
314+
ce->ce_flags &= ~clear_mask;
315+
}
316+
317+
/*
318+
* There may be aliases with different cases of the same
319+
* name that also need to be modified.
320+
*/
321+
if (ignore_case)
322+
ce = index_file_next_match(istate, ce, ignore_case);
323+
else
324+
break;
325+
326+
} while (ce);
327+
} else {
328+
stats->nr_unknown++;
321329
}
322330
}
323331

324332
entry += len + 1;
325333
}
326334
}
335+
}
336+
337+
/*
338+
* Clear the specified flags for all entries in the virtual file system
339+
* that match the specified select mask. Returns the number of entries
340+
* processed.
341+
*/
342+
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask)
343+
{
344+
struct apply_virtual_filesystem_stats stats = {0};
345+
346+
clear_ce_flags_virtualfilesystem_1(istate, select_mask, clear_mask, &stats);
347+
return istate->cache_nr;
348+
}
349+
350+
/*
351+
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
352+
*/
353+
void apply_virtualfilesystem(struct index_state *istate)
354+
{
355+
int i;
356+
struct apply_virtual_filesystem_stats stats = {0};
357+
358+
if (!git_config_get_virtualfilesystem())
359+
return;
360+
361+
trace2_region_enter("vfs", "apply", the_repository);
362+
363+
/* set CE_SKIP_WORKTREE bit on all entries */
364+
for (i = 0; i < istate->cache_nr; i++)
365+
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
327366

328-
if (nr_vfs_rows > 0) {
329-
trace2_data_intmax("vfs", the_repository, "apply/tracked", nr_bulk_skip + nr_explicit_skip);
367+
clear_ce_flags_virtualfilesystem_1(istate, 0, CE_SKIP_WORKTREE, &stats);
368+
if (stats.nr_vfs_rows > 0) {
369+
trace2_data_intmax("vfs", the_repository, "apply/tracked", stats.nr_bulk_skip + stats.nr_explicit_skip);
330370

331-
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", nr_vfs_rows);
332-
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", nr_vfs_dirs);
371+
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", stats.nr_vfs_rows);
372+
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", stats.nr_vfs_dirs);
333373

334-
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", nr_unknown);
335-
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", nr_bulk_skip);
336-
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", nr_explicit_skip);
374+
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", stats.nr_unknown);
375+
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", stats.nr_bulk_skip);
376+
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", stats.nr_explicit_skip);
337377
}
338378

339379
trace2_region_leave("vfs", "apply", the_repository);

virtualfilesystem.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
*/
77
void apply_virtualfilesystem(struct index_state *istate);
88

9+
/*
10+
* Clear the specified flags for all entries in the virtual file system
11+
* that match the specified select mask. Returns the number of entries
12+
* processed.
13+
*/
14+
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask);
15+
916
/*
1017
* Return 1 if the requested item is found in the virtual file system,
1118
* 0 for not found and -1 for undecided.

0 commit comments

Comments
 (0)