Skip to content

Commit f778b44

Browse files
committed
extract escaped glob prefixes
#perf
1 parent b569319 commit f778b44

File tree

2 files changed

+111
-8
lines changed

2 files changed

+111
-8
lines changed

src/lib/Support/Glob.cpp

Lines changed: 89 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,90 @@ match(std::string_view str, char const delimiter) const
467467
}
468468
return MatchType::PARTIAL;
469469
}
470+
471+
/* Check if a character at a given position is escaped.
472+
473+
@param pattern The glob pattern.
474+
@param pos The position of the character to check.
475+
@return true if the character is escaped, false otherwise.
476+
*/
477+
bool isEscaped(std::string_view pattern, std::size_t pos)
478+
{
479+
if (pos == 0)
480+
{
481+
return false;
482+
}
483+
std::size_t backslashCount = 0;
484+
while (pos > 0 && pattern[--pos] == '\\')
485+
{
486+
++backslashCount;
487+
}
488+
return backslashCount % 2 != 0;
489+
}
490+
491+
struct PrefixInfo
492+
{
493+
// The unescaped prefix
494+
std::string prefix;
495+
496+
// The encoded prefix size
497+
std::size_t prefixSize;
498+
};
499+
500+
501+
/* Extract the prefix of a glob pattern up to the first non-escaped metacharacter.
502+
503+
@param pattern The glob pattern.
504+
@return The prefix of the pattern.
505+
*/
506+
PrefixInfo
507+
extractPrefix(std::string_view pattern)
508+
{
509+
PrefixInfo result;
510+
std::size_t pos = 0;
511+
512+
while (pos < pattern.size())
513+
{
514+
// Find the first metacharacter or backslash
515+
std::size_t const metacharPos = pattern.find_first_of("?*[{\\", pos);
516+
517+
// Copy all literal characters up to the metacharacter
518+
result.prefix.append(pattern.substr(pos, metacharPos - pos));
519+
pos = metacharPos;
520+
521+
if (pos == std::string_view::npos)
522+
{
523+
// No more characters, we're done
524+
result.prefixSize = pattern.size();
525+
return result;
526+
}
527+
528+
if (char const c = pattern[pos];
529+
c == '\\' &&
530+
pos + 1 < pattern.size())
531+
{
532+
// Push the escaped character instead of the backslash
533+
result.prefix += pattern[pos + 1];
534+
pos += 2;
535+
}
536+
else if (c == '?' || c == '*' || c == '[' || c == '{')
537+
{
538+
// If it's escaped, it should have been handled in the
539+
// previous case.
540+
MRDOCS_ASSERT(!isEscaped(pattern, pos));
541+
break;
542+
}
543+
else
544+
{
545+
// Handle a backslash that is not escaping anything
546+
result.prefix += c;
547+
++pos;
548+
}
549+
}
550+
result.prefixSize = pos;
551+
return result;
552+
}
553+
470554
} // (anon)
471555

472556
struct GlobPattern::Impl {
@@ -492,12 +576,12 @@ create(
492576
// Store the original pattern.
493577
res.impl_->pattern = std::string(pattern);
494578

495-
// Store the prefix that does not contain any metacharacter.
496-
std::size_t const prefixSize = pattern.find_first_of("?*[{\\");
497-
res.impl_->prefix = pattern.substr(0, prefixSize);
498-
if (prefixSize == std::string::npos)
579+
// Store the pattern literal prefix.
580+
auto [prefix, prefixSize] = extractPrefix(pattern);
581+
res.impl_->prefix = std::move(prefix);
582+
if (prefixSize == pattern.size())
499583
{
500-
// The pattern does not contain any metacharacter.
584+
// The pattern does not contain any unescaped metacharacter.
501585
return res;
502586
}
503587

src/test/Support/Glob.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -384,10 +384,9 @@ struct Glob_test
384384
BOOST_TEST_NOT(glob.match("aab"));
385385
}
386386

387-
// invalid
387+
// stray \ becomes part of the literal prefix
388388
{
389-
// stray \ at the end
390-
BOOST_TEST_NOT(PathGlobPattern::create("a\\"));
389+
BOOST_TEST(PathGlobPattern::create("a\\"));
391390
}
392391
}
393392

@@ -582,6 +581,26 @@ struct Glob_test
582581
BOOST_TEST_NOT(glob.match("abcd"));
583582
BOOST_TEST_NOT(glob.match("a/b/c"));
584583
}
584+
585+
// literal string with escaped characters
586+
{
587+
auto globExp = PathGlobPattern::create("a\\*b");
588+
BOOST_TEST(globExp);
589+
PathGlobPattern const& glob = *globExp;
590+
BOOST_TEST(glob.isLiteral());
591+
BOOST_TEST(glob.match("a*b"));
592+
BOOST_TEST_NOT(glob.match("aab"));
593+
}
594+
595+
// literal string with all special characters escaped
596+
{
597+
auto globExp = PathGlobPattern::create("a\\*\\?\\[\\{\\}\\^\\!\\-\\]\\c");
598+
BOOST_TEST(globExp);
599+
PathGlobPattern const& glob = *globExp;
600+
BOOST_TEST(glob.isLiteral());
601+
BOOST_TEST(glob.match("a*?[{}^!-]c"));
602+
BOOST_TEST_NOT(glob.match("a"));
603+
}
585604
}
586605
}
587606
};

0 commit comments

Comments
 (0)