Skip to content

Commit fd841f6

Browse files
committed
path/filepath: add WalkDir
WalkDir is like Walk but can use ReadDir to read directories, instead of Readdirnames + Lstat on every entry, which is usually a significant performance improvement. (The Lstat can still happen if the walk function calls d.Info.) Fixes #42027. Change-Id: Ie11024b23be2656e320d41fd81ff0d8810aa729e Reviewed-on: https://go-review.googlesource.com/c/go/+/266240 Trust: Russ Cox <[email protected]> Run-TryBot: Russ Cox <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Rob Pike <[email protected]>
1 parent 5ed81a3 commit fd841f6

File tree

3 files changed

+250
-54
lines changed

3 files changed

+250
-54
lines changed

src/path/filepath/export_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@
55
package filepath
66

77
var LstatP = &lstat
8+
9+
type DirEntryFromInfo = dirEntryFromInfo

src/path/filepath/path.go

+189-24
Original file line numberDiff line numberDiff line change
@@ -336,25 +336,130 @@ func Rel(basepath, targpath string) (string, error) {
336336
// as an error by any function.
337337
var SkipDir = errors.New("skip this directory")
338338

339-
// WalkFunc is the type of the function called for each file or directory
340-
// visited by Walk. The path argument contains the argument to Walk as a
341-
// prefix; that is, if Walk is called with "dir", which is a directory
342-
// containing the file "a", the walk function will be called with argument
343-
// "dir/a". The info argument is the fs.FileInfo for the named path.
344-
//
345-
// If there was a problem walking to the file or directory named by path, the
346-
// incoming error will describe the problem and the function can decide how
347-
// to handle that error (and Walk will not descend into that directory). In the
348-
// case of an error, the info argument will be nil. If an error is returned,
349-
// processing stops. The sole exception is when the function returns the special
350-
// value SkipDir. If the function returns SkipDir when invoked on a directory,
351-
// Walk skips the directory's contents entirely. If the function returns SkipDir
352-
// when invoked on a non-directory file, Walk skips the remaining files in the
353-
// containing directory.
339+
// WalkDirFunc is the type of the function called by WalkDir to visit
340+
// each each file or directory.
341+
//
342+
// The path argument contains the argument to Walk as a prefix.
343+
// That is, if Walk is called with root argument "dir" and finds a file
344+
// named "a" in that directory, the walk function will be called with
345+
// argument "dir/a".
346+
//
347+
// The directory and file are joined with Join, which may clean the
348+
// directory name: if Walk is called with the root argument "x/../dir"
349+
// and finds a file named "a" in that directory, the walk function will
350+
// be called with argument "dir/a", not "x/../dir/a".
351+
//
352+
// The d argument is the fs.DirEntry for the named path.
353+
//
354+
// The error result returned by the function controls how WalkDir
355+
// continues. If the function returns the special value SkipDir, WalkDir
356+
// skips the current directory (path if d.IsDir() is true, otherwise
357+
// path's parent directory). Otherwise, if the function returns a non-nil
358+
// error, WalkDir stops entirely and returns that error.
359+
//
360+
// The err argument reports an error related to path, signaling that
361+
// WalkDir will not walk into that directory. The function can decide how
362+
// to handle that error; as described earlier, returning the error will
363+
// cause WalkDir to stop walking the entire tree.
364+
//
365+
// WalkDir calls the function with a non-nil err argument in two cases.
366+
//
367+
// First, if the initial os.Lstat on the root directory fails, WalkDir
368+
// calls the function with path set to root, d set to nil, and err set to
369+
// the error from os.Lstat.
370+
//
371+
// Second, if a directory's ReadDir method fails, WalkDir calls the
372+
// function with path set to the directory's path, d set to an
373+
// fs.DirEntry describing the directory, and err set to the error from
374+
// ReadDir. In this second case, the function is called twice with the
375+
// path of the directory: the first call is before the directory read is
376+
// attempted and has err set to nil, giving the function a chance to
377+
// return SkipDir and avoid the ReadDir entirely. The second call is
378+
// after a failed ReadDir and reports the error from ReadDir.
379+
// (If ReadDir succeeds, there is no second call.)
380+
//
381+
// The differences between WalkDirFunc compared to WalkFunc are:
382+
//
383+
// - The second argument has type fs.DirEntry instead of fs.FileInfo.
384+
// - The function is called before reading a directory, to allow SkipDir
385+
// to bypass the directory read entirely.
386+
// - If a directory read fails, the function is called a second time
387+
// for that directory to report the error.
388+
//
389+
type WalkDirFunc func(path string, d fs.DirEntry, err error) error
390+
391+
// WalkFunc is the type of the function called by Walk to visit each each
392+
// file or directory.
393+
//
394+
// The path argument contains the argument to Walk as a prefix.
395+
// That is, if Walk is called with root argument "dir" and finds a file
396+
// named "a" in that directory, the walk function will be called with
397+
// argument "dir/a".
398+
//
399+
// The directory and file are joined with Join, which may clean the
400+
// directory name: if Walk is called with the root argument "x/../dir"
401+
// and finds a file named "a" in that directory, the walk function will
402+
// be called with argument "dir/a", not "x/../dir/a".
403+
//
404+
// The info argument is the fs.FileInfo for the named path.
405+
//
406+
// The error result returned by the function controls how Walk continues.
407+
// If the function returns the special value SkipDir, Walk skips the
408+
// current directory (path if info.IsDir() is true, otherwise path's
409+
// parent directory). Otherwise, if the function returns a non-nil error,
410+
// Walk stops entirely and returns that error.
411+
//
412+
// The err argument reports an error related to path, signaling that Walk
413+
// will not walk into that directory. The function can decide how to
414+
// handle that error; as described earlier, returning the error will
415+
// cause Walk to stop walking the entire tree.
416+
//
417+
// Walk calls the function with a non-nil err argument in two cases.
418+
//
419+
// First, if an os.Lstat on the root directory or any directory or file
420+
// in the tree fails, Walk calls the function with path set to that
421+
// directory or file's path, info set to nil, and err set to the error
422+
// from os.Lstat.
423+
//
424+
// Second, if a directory's Readdirnames method fails, Walk calls the
425+
// function with path set to the directory's path, info, set to an
426+
// fs.FileInfo describing the directory, and err set to the error from
427+
// Readdirnames.
354428
type WalkFunc func(path string, info fs.FileInfo, err error) error
355429

356430
var lstat = os.Lstat // for testing
357431

432+
// walkDir recursively descends path, calling walkDirFn.
433+
func walkDir(path string, d fs.DirEntry, walkDirFn WalkDirFunc) error {
434+
if err := walkDirFn(path, d, nil); err != nil || !d.IsDir() {
435+
if err == SkipDir && d.IsDir() {
436+
// Successfully skipped directory.
437+
err = nil
438+
}
439+
return err
440+
}
441+
442+
dirs, err := readDir(path)
443+
if err != nil {
444+
// Second call, to report ReadDir error.
445+
err = walkDirFn(path, d, err)
446+
if err != nil {
447+
return err
448+
}
449+
}
450+
451+
for _, d1 := range dirs {
452+
path1 := Join(path, d1.Name())
453+
if err := walkDir(path1, d1, walkDirFn); err != nil {
454+
if err == SkipDir {
455+
break
456+
}
457+
return err
458+
}
459+
}
460+
return nil
461+
}
462+
358463
// walk recursively descends path, calling walkFn.
359464
func walk(path string, info fs.FileInfo, walkFn WalkFunc) error {
360465
if !info.IsDir() {
@@ -393,27 +498,87 @@ func walk(path string, info fs.FileInfo, walkFn WalkFunc) error {
393498
return nil
394499
}
395500

396-
// Walk walks the file tree rooted at root, calling walkFn for each file or
397-
// directory in the tree, including root. All errors that arise visiting files
398-
// and directories are filtered by walkFn. The files are walked in lexical
399-
// order, which makes the output deterministic but means that for very
400-
// large directories Walk can be inefficient.
501+
// WalkDir walks the file tree rooted at root, calling fn for each file or
502+
// directory in the tree, including root.
503+
//
504+
// All errors that arise visiting files and directories are filtered by fn:
505+
// see the WalkDirFunc documentation for details.
506+
//
507+
// The files are walked in lexical order, which makes the output deterministic
508+
// but requires WalkDir to read an entire directory into memory before proceeding
509+
// to walk that directory.
510+
//
511+
// WalkDir does not follow symbolic links.
512+
func WalkDir(root string, fn WalkDirFunc) error {
513+
info, err := os.Lstat(root)
514+
if err != nil {
515+
err = fn(root, nil, err)
516+
} else {
517+
err = walkDir(root, &dirEntryFromInfo{info}, fn)
518+
}
519+
if err == SkipDir {
520+
return nil
521+
}
522+
return err
523+
}
524+
525+
type dirEntryFromInfo struct {
526+
fs.FileInfo
527+
}
528+
529+
func (e *dirEntryFromInfo) Type() fs.FileMode {
530+
return e.Mode().Type()
531+
}
532+
533+
func (e *dirEntryFromInfo) Info() (fs.FileInfo, error) {
534+
return e.FileInfo, nil
535+
}
536+
537+
// Walk walks the file tree rooted at root, calling fn for each file or
538+
// directory in the tree, including root.
539+
//
540+
// All errors that arise visiting files and directories are filtered by fn:
541+
// see the WalkFunc documentation for details.
542+
//
543+
// The files are walked in lexical order, which makes the output deterministic
544+
// but requires Walk to read an entire directory into memory before proceeding
545+
// to walk that directory.
546+
//
401547
// Walk does not follow symbolic links.
402-
func Walk(root string, walkFn WalkFunc) error {
548+
//
549+
// Walk is less efficient than WalkDir, introduced in Go 1.16,
550+
// which avoids calling os.Lstat on every visited file or directory.
551+
func Walk(root string, fn WalkFunc) error {
403552
info, err := os.Lstat(root)
404553
if err != nil {
405-
err = walkFn(root, nil, err)
554+
err = fn(root, nil, err)
406555
} else {
407-
err = walk(root, info, walkFn)
556+
err = walk(root, info, fn)
408557
}
409558
if err == SkipDir {
410559
return nil
411560
}
412561
return err
413562
}
414563

415-
// readDirNames reads the directory named by dirname and returns
564+
// readDir reads the directory named by dirname and returns
416565
// a sorted list of directory entries.
566+
func readDir(dirname string) ([]fs.DirEntry, error) {
567+
f, err := os.Open(dirname)
568+
if err != nil {
569+
return nil, err
570+
}
571+
dirs, err := f.ReadDir(-1)
572+
f.Close()
573+
if err != nil {
574+
return nil, err
575+
}
576+
sort.Slice(dirs, func(i, j int) bool { return dirs[i].Name() < dirs[j].Name() })
577+
return dirs, nil
578+
}
579+
580+
// readDirNames reads the directory named by dirname and returns
581+
// a sorted list of directory entry names.
417582
func readDirNames(dirname string) ([]string, error) {
418583
f, err := os.Open(dirname)
419584
if err != nil {

0 commit comments

Comments
 (0)