Skip to content

Commit 1207ff4

Browse files
unpack-trees:virtualfilesystem: Improve efficiency of clear_ce_flags
When the virtualfilesystem is enabled the previous implementation of clear_ce_flags would iterate all of the cache entries and query whether each one is in the virtual filesystem to determine whether to clear one of the SKIP_WORKTREE bits. For each cache entry, we would do a hash lookup for each parent directory in the is_included_in_virtualfilesystem function. The former approach is slow for a typical Windows OS enlistment with 3 million files where only a small percentage is in the virtual filesystem. The cost is O(n_index_entries * n_chars_per_path * n_parent_directories_per_path). In this change, we use the same approach as apply_virtualfilesystem, which iterates the set of entries in the virtualfilesystem and searches in the cache for the corresponding entries in order to clear their flags. This approach has a cost of O(n_virtual_filesystem_entries * n_chars_per_path * log(n_index_entries)). n_virtual_filesystem_entries is typically much less than n_index_entries, in which case the new approach is much faster. We wind up building the name hash for the index, but this occurs quickly thanks to the multi-threading. Signed-off-by: Neeraj Singh <[email protected]>
1 parent 5650dcd commit 1207ff4

File tree

3 files changed

+101
-65
lines changed

3 files changed

+101
-65
lines changed

unpack-trees.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,14 +1445,6 @@ static int clear_ce_flags_1(struct index_state *istate,
14451445
continue;
14461446
}
14471447

1448-
/* if it's not in the virtual file system, exit early */
1449-
if (core_virtualfilesystem) {
1450-
if (is_included_in_virtualfilesystem(ce->name, ce->ce_namelen) > 0)
1451-
ce->ce_flags &= ~clear_mask;
1452-
cache++;
1453-
continue;
1454-
}
1455-
14561448
if (prefix->len && strncmp(ce->name, prefix->buf, prefix->len))
14571449
break;
14581450

@@ -1529,12 +1521,19 @@ static int clear_ce_flags(struct index_state *istate,
15291521
xsnprintf(label, sizeof(label), "clear_ce_flags/0x%08lx_0x%08lx",
15301522
(unsigned long)select_mask, (unsigned long)clear_mask);
15311523
trace2_region_enter("unpack_trees", label, the_repository);
1532-
rval = clear_ce_flags_1(istate,
1533-
istate->cache,
1534-
istate->cache_nr,
1535-
&prefix,
1536-
select_mask, clear_mask,
1537-
pl, 0, 0);
1524+
if (core_virtualfilesystem) {
1525+
rval = clear_ce_flags_virtualfilesystem(istate,
1526+
select_mask,
1527+
clear_mask);
1528+
} else {
1529+
rval = clear_ce_flags_1(istate,
1530+
istate->cache,
1531+
istate->cache_nr,
1532+
&prefix,
1533+
select_mask, clear_mask,
1534+
pl, 0, 0);
1535+
}
1536+
15381537
trace2_region_leave("unpack_trees", label, the_repository);
15391538

15401539
stop_progress(&istate->progress);

virtualfilesystem.c

Lines changed: 81 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -247,93 +247,123 @@ int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dt
247247
return -1;
248248
}
249249

250-
/*
251-
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
252-
*/
253-
void apply_virtualfilesystem(struct index_state *istate)
250+
struct apply_virtual_filesystem_stats {
251+
int nr_unknown;
252+
int nr_vfs_dirs;
253+
int nr_vfs_rows;
254+
int nr_bulk_skip;
255+
int nr_explicit_skip;
256+
};
257+
258+
static void clear_ce_flags_virtualfilesystem_1(struct index_state *istate, int select_mask, int clear_mask,
259+
struct apply_virtual_filesystem_stats *stats)
254260
{
255261
char *buf, *entry;
256262
int i;
257-
int nr_unknown = 0;
258-
int nr_vfs_dirs = 0;
259-
int nr_vfs_rows = 0;
260-
int nr_bulk_skip = 0;
261-
int nr_explicit_skip = 0;
262-
263-
if (!git_config_get_virtualfilesystem())
264-
return;
265-
266-
trace2_region_enter("vfs", "apply", the_repository);
267263

268264
if (!virtual_filesystem_data.len)
269265
get_virtual_filesystem_data(&virtual_filesystem_data);
270266

271-
/* set CE_SKIP_WORKTREE bit on all entries */
272-
for (i = 0; i < istate->cache_nr; i++)
273-
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
274-
275-
/* clear CE_SKIP_WORKTREE bit for everything in the virtual file system */
267+
/* clear specified flag bits for everything in the virtual file system */
276268
entry = buf = virtual_filesystem_data.buf;
277269
for (i = 0; i < virtual_filesystem_data.len; i++) {
278270
if (buf[i] == '\0') {
271+
struct cache_entry *ce;
279272
int pos, len;
280273

281-
nr_vfs_rows++;
274+
stats->nr_vfs_rows++;
282275

283276
len = buf + i - entry;
284277

285278
/* look for a directory wild card (ie "dir1/") */
286279
if (buf[i - 1] == '/') {
287-
nr_vfs_dirs++;
280+
stats->nr_vfs_dirs++;
288281
if (ignore_case)
289282
adjust_dirname_case(istate, entry);
290283
pos = index_name_pos(istate, entry, len);
291284
if (pos < 0) {
292-
pos = -pos - 1;
293-
while (pos < istate->cache_nr && !fspathncmp(istate->cache[pos]->name, entry, len)) {
294-
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
295-
nr_bulk_skip++;
296-
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
297-
pos++;
285+
for (pos = -pos - 1; pos < istate->cache_nr; pos++) {
286+
ce = istate->cache[pos];
287+
if (fspathncmp(ce->name, entry, len))
288+
break;
289+
290+
if (select_mask && !(ce->ce_flags & select_mask))
291+
continue;
292+
293+
if (ce->ce_flags & clear_mask)
294+
stats->nr_bulk_skip++;
295+
ce->ce_flags &= ~clear_mask;
298296
}
299297
}
300298
} else {
301299
if (ignore_case) {
302-
struct cache_entry *ce = index_file_exists(istate, entry, len, ignore_case);
303-
if (ce) {
304-
if (ce->ce_flags & CE_SKIP_WORKTREE)
305-
nr_explicit_skip++;
306-
ce->ce_flags &= ~CE_SKIP_WORKTREE;
307-
}
308-
else {
309-
nr_unknown++;
310-
}
300+
ce = index_file_exists(istate, entry, len, ignore_case);
311301
} else {
312302
int pos = index_name_pos(istate, entry, len);
313-
if (pos >= 0) {
314-
if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE)
315-
nr_explicit_skip++;
316-
istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE;
317-
}
318-
else {
319-
nr_unknown++;
320-
}
303+
304+
ce = NULL;
305+
if (pos >= 0)
306+
ce = istate->cache[pos];
307+
}
308+
309+
if (ce) {
310+
if (select_mask && !(ce->ce_flags & select_mask))
311+
goto skip_entry;
312+
313+
if (ce->ce_flags & clear_mask)
314+
stats->nr_explicit_skip++;
315+
ce->ce_flags &= ~clear_mask;
316+
} else {
317+
stats->nr_unknown++;
321318
}
322319
}
323320

321+
skip_entry:
324322
entry += len + 1;
325323
}
326324
}
325+
}
326+
327+
/*
328+
* Clear the specified flags for all entries in the virtual file system
329+
* that match the specified select mask. Returns the number of entries
330+
* processed.
331+
*/
332+
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask)
333+
{
334+
struct apply_virtual_filesystem_stats stats = {0};
335+
336+
clear_ce_flags_virtualfilesystem_1(istate, select_mask, clear_mask, &stats);
337+
return istate->cache_nr;
338+
}
339+
340+
/*
341+
* Update the CE_SKIP_WORKTREE bits based on the virtual file system.
342+
*/
343+
void apply_virtualfilesystem(struct index_state *istate)
344+
{
345+
int i;
346+
struct apply_virtual_filesystem_stats stats = {0};
347+
348+
if (!git_config_get_virtualfilesystem())
349+
return;
350+
351+
trace2_region_enter("vfs", "apply", the_repository);
352+
353+
/* set CE_SKIP_WORKTREE bit on all entries */
354+
for (i = 0; i < istate->cache_nr; i++)
355+
istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE;
327356

328-
if (nr_vfs_rows > 0) {
329-
trace2_data_intmax("vfs", the_repository, "apply/tracked", nr_bulk_skip + nr_explicit_skip);
357+
clear_ce_flags_virtualfilesystem_1(istate, 0, CE_SKIP_WORKTREE, &stats);
358+
if (stats.nr_vfs_rows > 0) {
359+
trace2_data_intmax("vfs", the_repository, "apply/tracked", stats.nr_bulk_skip + stats.nr_explicit_skip);
330360

331-
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", nr_vfs_rows);
332-
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", nr_vfs_dirs);
361+
trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", stats.nr_vfs_rows);
362+
trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", stats.nr_vfs_dirs);
333363

334-
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", nr_unknown);
335-
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", nr_bulk_skip);
336-
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", nr_explicit_skip);
364+
trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", stats.nr_unknown);
365+
trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", stats.nr_bulk_skip);
366+
trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", stats.nr_explicit_skip);
337367
}
338368

339369
trace2_region_leave("vfs", "apply", the_repository);

virtualfilesystem.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
*/
77
void apply_virtualfilesystem(struct index_state *istate);
88

9+
/*
10+
* Clear the specified flags for all entries in the virtual file system
11+
* that match the specified select mask. Returns the number of entries
12+
* processed.
13+
*/
14+
int clear_ce_flags_virtualfilesystem(struct index_state *istate, int select_mask, int clear_mask);
15+
916
/*
1017
* Return 1 if the requested item is found in the virtual file system,
1118
* 0 for not found and -1 for undecided.

0 commit comments

Comments
 (0)