Skip to content

Commit 950f6d1

Browse files
Be more efficient about globbing
Before, we were walking the directory tree recursively unconditionally. Now we check if the pattern is recursive before doing that, and also only check the directory's full listing if a glob pattern is involved at all, so literal patterns are more efficient.
1 parent 5c9cade commit 950f6d1

File tree

3 files changed

+73
-60
lines changed

3 files changed

+73
-60
lines changed

Cabal/Distribution/Simple/Glob.hs

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
module Distribution.Simple.Glob (
1717
matchFileGlob,
1818
matchDirFileGlob,
19+
matchDirFileGlob',
1920
fileGlobMatches,
2021
parseFileGlob,
2122
explainGlobSyntaxError,
2223
GlobSyntaxError(..),
23-
GlobPat,
24+
Glob,
2425
) where
2526

2627
import Prelude ()
@@ -30,6 +31,7 @@ import Distribution.Simple.Utils
3031
import Distribution.Verbosity
3132
import Distribution.Version
3233

34+
import System.Directory (getDirectoryContents, doesFileExist)
3335
import System.FilePath (joinPath, splitExtensions, splitDirectories, takeExtensions, (</>))
3436

3537
-- Note throughout that we use splitDirectories, not splitPath. On
@@ -84,30 +86,35 @@ explainGlobSyntaxError filepath VersionDoesNotSupportGlob =
8486

8587
data IsRecursive = Recursive | NonRecursive
8688

87-
data GlobPat = PatStem String GlobPat
88-
-- ^ A single subdirectory component + remainder.
89-
| PatMatch IsRecursive String
90-
-- ^ First argument: Is this a @**/*.ext@ pattern?
91-
-- Second argument: the extensions to accept.
92-
| PatLit FilePath
93-
-- ^ Literal file name.
89+
data Glob
90+
= GlobStem String Glob
91+
-- ^ A single subdirectory component + remainder.
92+
| GlobFinal GlobFinal
9493

95-
fileGlobMatches :: GlobPat -> FilePath -> Bool
94+
data GlobFinal
95+
= FinalMatch IsRecursive String
96+
-- ^ First argument: Is this a @**/*.ext@ pattern?
97+
-- Second argument: the extensions to accept.
98+
| FinalLit FilePath
99+
-- ^ Literal file name.
100+
101+
fileGlobMatches :: Glob -> FilePath -> Bool
96102
fileGlobMatches pat = fileGlobMatchesSegments pat . splitDirectories
97103

98-
fileGlobMatchesSegments :: GlobPat -> [FilePath] -> Bool
104+
fileGlobMatchesSegments :: Glob -> [FilePath] -> Bool
99105
fileGlobMatchesSegments _ [] = False
100106
fileGlobMatchesSegments pat (seg : segs) = case pat of
101-
PatStem dir pat' ->
107+
GlobStem dir pat' ->
102108
dir == seg && fileGlobMatchesSegments pat' segs
103-
PatMatch Recursive ext ->
104-
ext == takeExtensions (last $ seg:segs)
105-
PatMatch NonRecursive ext ->
106-
null segs && ext == takeExtensions seg
107-
PatLit filename ->
108-
null segs && filename == seg
109-
110-
parseFileGlob :: Version -> FilePath -> Either GlobSyntaxError GlobPat
109+
GlobFinal final -> case final of
110+
FinalMatch Recursive ext ->
111+
ext == takeExtensions (last $ seg:segs)
112+
FinalMatch NonRecursive ext ->
113+
null segs && ext == takeExtensions seg
114+
FinalLit filename ->
115+
null segs && filename == seg
116+
117+
parseFileGlob :: Version -> FilePath -> Either GlobSyntaxError Glob
111118
parseFileGlob version filepath = case reverse (splitDirectories filepath) of
112119
[] ->
113120
Left EmptyGlob
@@ -118,31 +125,43 @@ parseFileGlob version filepath = case reverse (splitDirectories filepath) of
118125
| null ext -> Left NoExtensionOnStar
119126
| otherwise -> Right ext
120127
_ -> Left LiteralFileNameGlobStar
121-
foldM addStem (PatMatch Recursive ext) segments
128+
foldM addStem (GlobFinal $ FinalMatch Recursive ext) segments
122129
| otherwise -> Left VersionDoesNotSupportGlobStar
123130
(filename : segments) -> do
124131
pat <- case splitExtensions filename of
125132
("*", ext) | not allowGlob -> Left VersionDoesNotSupportGlob
126133
| '*' `elem` ext -> Left StarInExtension
127134
| null ext -> Left NoExtensionOnStar
128-
| otherwise -> Right (PatMatch NonRecursive ext)
135+
| otherwise -> Right (FinalMatch NonRecursive ext)
129136
(_, ext) | '*' `elem` ext -> Left StarInExtension
130137
| '*' `elem` filename -> Left StarInFileName
131-
| otherwise -> Right (PatLit filename)
132-
foldM addStem pat segments
138+
| otherwise -> Right (FinalLit filename)
139+
foldM addStem (GlobFinal pat) segments
133140
where
134141
allowGlob = version >= mkVersion [1,6]
135142
allowGlobStar = version >= mkVersion [3,0]
136143
addStem pat seg
137144
| '*' `elem` seg = Left StarInDirectory
138-
| otherwise = Right (PatStem seg pat)
145+
| otherwise = Right (GlobStem seg pat)
139146

140147
matchFileGlob :: Verbosity -> Version -> FilePath -> IO [FilePath]
141148
matchFileGlob verbosity version = matchDirFileGlob verbosity version "."
142149

143-
-- The returned values do not include the supplied @dir@ prefix.
150+
-- | Like 'matchDirFileGlob'', but will 'die'' when the glob matches
151+
-- no files.
144152
matchDirFileGlob :: Verbosity -> Version -> FilePath -> FilePath -> IO [FilePath]
145-
matchDirFileGlob verbosity version rawDir filepath = case parseFileGlob version filepath of
153+
matchDirFileGlob verbosity version dir filepath = do
154+
matches <- matchDirFileGlob' verbosity version dir filepath
155+
when (null matches) $ die' verbosity $
156+
"filepath wildcard '" ++ filepath
157+
++ "' does not match any files."
158+
return matches
159+
160+
-- | Match files against a glob, starting in a directory.
161+
--
162+
-- The returned values do not include the supplied @dir@ prefix.
163+
matchDirFileGlob' :: Verbosity -> Version -> FilePath -> FilePath -> IO [FilePath]
164+
matchDirFileGlob' verbosity version rawDir filepath = case parseFileGlob version filepath of
146165
Left err -> die' verbosity $ explainGlobSyntaxError filepath err
147166
Right pat -> do
148167
-- The default data-dir is null. Our callers -should- be
@@ -159,17 +178,22 @@ matchDirFileGlob verbosity version rawDir filepath = case parseFileGlob version
159178
-- ".". Walking the tree starting there involves going into .git/
160179
-- and dist-newstyle/, which is a lot of work for no reward, so
161180
-- extract the constant prefix from the pattern and start walking
162-
-- there. If the pattern is **/*.blah, then of course we'll have
163-
-- to walk the whole thing anyway, but that's what the user asked
164-
-- for!
165-
let (prefixSegments, pat') = splitConstantPrefix pat
181+
-- there, and only walk as much as we need to: recursively if **,
182+
-- the whole directory if *, and just the specific file if it's a
183+
-- literal.
184+
let (prefixSegments, final) = splitConstantPrefix pat
166185
joinedPrefix = joinPath prefixSegments
167-
files <- getDirectoryContentsRecursive (dir </> joinedPrefix)
168-
case filter (fileGlobMatches pat') files of
169-
[] -> die' verbosity $
170-
"filepath wildcard '" ++ filepath
171-
++ "' does not match any files."
172-
matches -> return $ fmap (joinedPrefix </>) matches
186+
files <- case final of
187+
FinalMatch recursive exts -> do
188+
let prefix = dir </> joinedPrefix
189+
candidates <- case recursive of
190+
Recursive -> getDirectoryContentsRecursive prefix
191+
NonRecursive -> filterM (doesFileExist . (prefix </>)) =<< getDirectoryContents prefix
192+
return $ filter ((==) exts . takeExtensions) candidates
193+
FinalLit fn -> do
194+
exists <- doesFileExist (dir </> joinedPrefix </> fn)
195+
return [ fn | exists ]
196+
return $ fmap (joinedPrefix </>) files
173197

174198
unfoldr' :: (a -> Either r (b, a)) -> a -> ([b], r)
175199
unfoldr' f a = case f a of
@@ -178,10 +202,10 @@ unfoldr' f a = case f a of
178202
(bs, r) -> (b : bs, r)
179203

180204
-- | Extract the (possibly null) constant prefix from the pattern.
181-
-- This has the property that, if @(pref, pat') = splitConstantPrefix pat@,
182-
-- then @pat === foldr PatStem pat' pref@.
183-
splitConstantPrefix :: GlobPat -> ([FilePath], GlobPat)
205+
-- This has the property that, if @(pref, final) = splitConstantPrefix pat@,
206+
-- then @pat === foldr GlobStem (GlobFinal final) pref@.
207+
splitConstantPrefix :: Glob -> ([FilePath], GlobFinal)
184208
splitConstantPrefix = unfoldr' step
185209
where
186-
step (PatStem seg pat) = Right (seg, pat)
187-
step pat = Left pat
210+
step (GlobStem seg pat) = Right (seg, pat)
211+
step (GlobFinal pat) = Left pat

Cabal/doc/developing-packages.rst

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,11 +1009,12 @@ describe the package as a whole:
10091009
of the same file type without making it too easy to accidentally
10101010
include unwanted files.
10111011

1012-
On efficiency: the directory tree will be walked starting with the
1013-
parent directory of the first wildcard. If that's the root of the
1014-
project, this might include ``.git/``, ``dist-newstyle/``, or
1015-
other large directories! To avoid this behaviour, put the files
1016-
that wildcards will match against in their own folder.
1012+
On efficiency: if you use ``**`` patterns, the directory tree will
1013+
be walked starting with the parent directory of the ``**``. If
1014+
that's the root of the project, this might include ``.git/``,
1015+
``dist-newstyle/``, or other large directories! To avoid this
1016+
behaviour, put the files that wildcards will match against in
1017+
their own folder.
10171018

10181019
``**`` wildcards are available starting in Cabal 3.0.
10191020

Cabal/tests/UnitTests/Distribution/Simple/Glob.hs

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ compatibilityTests version =
5151
[ testCase "literal match" $
5252
testMatches "foo/a" ["foo/a"]
5353
, testCase "literal no match on prefix" $
54-
testNoMatches "foo/c.html"
54+
testMatches "foo/c.html" []
5555
, testCase "literal no match on suffix" $
5656
testMatches "foo/a.html" ["foo/a.html"]
5757
, testCase "literal no prefix" $
@@ -81,7 +81,6 @@ compatibilityTests version =
8181
]
8282
where
8383
testMatches = testMatchesVersion version
84-
testNoMatches = testNoMatchesVersion version
8584
testFailParse = testFailParseVersion version
8685

8786
-- For efficiency reasons, matchDirFileGlob isn't a simple call to
@@ -103,23 +102,12 @@ testMatchesVersion version pat expected = do
103102
-- ...and the impure glob matcher.
104103
withSystemTempDirectory "globstar-sample" $ \tmpdir -> do
105104
makeSampleFiles tmpdir
106-
actual <- matchDirFileGlob Verbosity.normal version tmpdir pat
105+
actual <- matchDirFileGlob' Verbosity.normal version tmpdir pat
107106
unless (isEqual actual expected) $
108107
assertFailure $ "Unexpected result (impure matcher): " ++ show actual
109108
where
110109
isEqual = (==) `on` (sort . fmap normalise)
111110

112-
-- TODO: Unify this and testMatchesVersion. Can't do this yet because
113-
-- matchDirFileGlob calls die' when it doesn't match anything.
114-
testNoMatchesVersion :: Version -> FilePath -> Assertion
115-
testNoMatchesVersion version pat =
116-
case parseFileGlob version pat of
117-
Left _ -> assertFailure "Couldn't compile the pattern."
118-
Right globPat ->
119-
let actual = filter (fileGlobMatches globPat) sampleFileNames
120-
in unless (null actual) $
121-
assertFailure $ "Unexpected result (pure matcher): " ++ show actual
122-
123111
testFailParseVersion :: Version -> FilePath -> GlobSyntaxError -> Assertion
124112
testFailParseVersion version pat expected =
125113
case parseFileGlob version pat of

0 commit comments

Comments
 (0)