Skip to content

Commit 3527ee0

Browse files
committed
automata: add 'is_match' as its own path to meta regex internals
I originally prided myself on not having a dedicated `is_match` routine on the meta regex engine's internal `Strategy` trait, and actually spent a fair amount of attention ensuring that `is_match` and `find` always returned the same results. That is, `is_match` returns true if and only if `find` returns a match. But the fix in the previous commits for #1059 means that a `PikeVM` and a `BoundedBacktracker` can be used to run a search with an NFA that has no capture states. Since both engines are implemented to only track offsets via those capture states, it follows that the only thing that can be returned in such cases is whether a match occurs (and if so, which pattern matched). That in turn means that `is_match` can return `true` while `find` can return `None` for the same search. This is because the latter returns `None` even when a match is found but there are no capture states to record the offsets of the match. This in theory could be resolved by adding APIs to the `PikeVM` and the `BoundedBacktracker` that return a `HalfMatch` without depending on any capture states at all. Then `is_match` could be implemented in terms of those APIs. That is probably the right path, but it's pretty gnarly to do without breaking changes and I don't want to do any breaking changes right now. So instead, we just add a special path to the meta regex engine for `is_match` and permit some cases to have different results between `is_match` and `find`. Sigh.
1 parent 9d86815 commit 3527ee0

File tree

3 files changed

+158
-3
lines changed

3 files changed

+158
-3
lines changed

regex-automata/src/meta/regex.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,14 @@ impl Regex {
529529
#[inline]
530530
pub fn is_match<'h, I: Into<Input<'h>>>(&self, input: I) -> bool {
531531
let input = input.into().earliest(true);
532-
self.search_half(&input).is_some()
532+
if self.imp.info.is_impossible(&input) {
533+
return false;
534+
}
535+
let mut guard = self.pool.get();
536+
let result = self.imp.strat.is_match(&mut guard, &input);
537+
// See 'Regex::search' for why we put the guard back explicitly.
538+
PoolGuard::put(guard);
539+
result
533540
}
534541

535542
/// Executes a leftmost search and returns the first match that is found,

regex-automata/src/meta/strategy.rs

+118-2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ pub(super) trait Strategy:
5858
input: &Input<'_>,
5959
) -> Option<HalfMatch>;
6060

61+
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;
62+
6163
fn search_slots(
6264
&self,
6365
cache: &mut Cache,
@@ -399,6 +401,10 @@ impl<P: PrefilterI> Strategy for Pre<P> {
399401
self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
400402
}
401403

404+
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
405+
self.search(cache, input).is_some()
406+
}
407+
402408
fn search_slots(
403409
&self,
404410
cache: &mut Cache,
@@ -623,6 +629,29 @@ impl Core {
623629
}
624630
}
625631

632+
fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
633+
if let Some(ref e) = self.onepass.get(input) {
634+
trace!(
635+
"using OnePass for is-match search at {:?}",
636+
input.get_span()
637+
);
638+
e.search_slots(&mut cache.onepass, input, &mut []).is_some()
639+
} else if let Some(ref e) = self.backtrack.get(input) {
640+
trace!(
641+
"using BoundedBacktracker for is-match search at {:?}",
642+
input.get_span()
643+
);
644+
e.is_match(&mut cache.backtrack, input)
645+
} else {
646+
trace!(
647+
"using PikeVM for is-match search at {:?}",
648+
input.get_span()
649+
);
650+
let e = self.pikevm.get();
651+
e.is_match(&mut cache.pikevm, input)
652+
}
653+
}
654+
626655
fn is_capture_search_needed(&self, slots_len: usize) -> bool {
627656
slots_len > self.nfa.group_info().implicit_slot_len()
628657
}
@@ -703,7 +732,7 @@ impl Strategy for Core {
703732
// The main difference with 'search' is that if we're using a DFA, we
704733
// can use a single forward scan without needing to run the reverse
705734
// DFA.
706-
return if let Some(e) = self.dfa.get(input) {
735+
if let Some(e) = self.dfa.get(input) {
707736
trace!("using full DFA for half search at {:?}", input.get_span());
708737
match e.try_search_half_fwd(input) {
709738
Ok(x) => x,
@@ -723,7 +752,38 @@ impl Strategy for Core {
723752
}
724753
} else {
725754
self.search_half_nofail(cache, input)
726-
};
755+
}
756+
}
757+
758+
#[cfg_attr(feature = "perf-inline", inline(always))]
759+
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
760+
if let Some(e) = self.dfa.get(input) {
761+
trace!(
762+
"using full DFA for is-match search at {:?}",
763+
input.get_span()
764+
);
765+
match e.try_search_half_fwd(input) {
766+
Ok(x) => x.is_some(),
767+
Err(_err) => {
768+
trace!("full DFA half search failed: {}", _err);
769+
self.is_match_nofail(cache, input)
770+
}
771+
}
772+
} else if let Some(e) = self.hybrid.get(input) {
773+
trace!(
774+
"using lazy DFA for is-match search at {:?}",
775+
input.get_span()
776+
);
777+
match e.try_search_half_fwd(&mut cache.hybrid, input) {
778+
Ok(x) => x.is_some(),
779+
Err(_err) => {
780+
trace!("lazy DFA half search failed: {}", _err);
781+
self.is_match_nofail(cache, input)
782+
}
783+
}
784+
} else {
785+
self.is_match_nofail(cache, input)
786+
}
727787
}
728788

729789
#[cfg_attr(feature = "perf-inline", inline(always))]
@@ -983,6 +1043,21 @@ impl Strategy for ReverseAnchored {
9831043
}
9841044
}
9851045

1046+
#[cfg_attr(feature = "perf-inline", inline(always))]
1047+
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1048+
if input.get_anchored().is_anchored() {
1049+
return self.core.is_match(cache, input);
1050+
}
1051+
match self.try_search_half_anchored_rev(cache, input) {
1052+
Err(_err) => {
1053+
trace!("fast reverse anchored search failed: {}", _err);
1054+
self.core.is_match_nofail(cache, input)
1055+
}
1056+
Ok(None) => false,
1057+
Ok(Some(_)) => true,
1058+
}
1059+
}
1060+
9861061
#[cfg_attr(feature = "perf-inline", inline(always))]
9871062
fn search_slots(
9881063
&self,
@@ -1335,6 +1410,28 @@ impl Strategy for ReverseSuffix {
13351410
}
13361411
}
13371412

1413+
#[cfg_attr(feature = "perf-inline", inline(always))]
1414+
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1415+
if input.get_anchored().is_anchored() {
1416+
return self.core.is_match(cache, input);
1417+
}
1418+
match self.try_search_half_start(cache, input) {
1419+
Err(RetryError::Quadratic(_err)) => {
1420+
trace!("reverse suffix half optimization failed: {}", _err);
1421+
self.core.is_match_nofail(cache, input)
1422+
}
1423+
Err(RetryError::Fail(_err)) => {
1424+
trace!(
1425+
"reverse suffix reverse fast half search failed: {}",
1426+
_err
1427+
);
1428+
self.core.is_match_nofail(cache, input)
1429+
}
1430+
Ok(None) => false,
1431+
Ok(Some(_)) => true,
1432+
}
1433+
}
1434+
13381435
#[cfg_attr(feature = "perf-inline", inline(always))]
13391436
fn search_slots(
13401437
&self,
@@ -1717,6 +1814,25 @@ impl Strategy for ReverseInner {
17171814
}
17181815
}
17191816

1817+
#[cfg_attr(feature = "perf-inline", inline(always))]
1818+
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1819+
if input.get_anchored().is_anchored() {
1820+
return self.core.is_match(cache, input);
1821+
}
1822+
match self.try_search_full(cache, input) {
1823+
Err(RetryError::Quadratic(_err)) => {
1824+
trace!("reverse inner half optimization failed: {}", _err);
1825+
self.core.is_match_nofail(cache, input)
1826+
}
1827+
Err(RetryError::Fail(_err)) => {
1828+
trace!("reverse inner fast half search failed: {}", _err);
1829+
self.core.is_match_nofail(cache, input)
1830+
}
1831+
Ok(None) => false,
1832+
Ok(Some(_)) => true,
1833+
}
1834+
}
1835+
17201836
#[cfg_attr(feature = "perf-inline", inline(always))]
17211837
fn search_slots(
17221838
&self,

regex-automata/src/meta/wrappers.rs

+32
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,15 @@ impl PikeVMEngine {
8787
Ok(PikeVMEngine(engine))
8888
}
8989

90+
#[cfg_attr(feature = "perf-inline", inline(always))]
91+
pub(crate) fn is_match(
92+
&self,
93+
cache: &mut PikeVMCache,
94+
input: &Input<'_>,
95+
) -> bool {
96+
self.0.is_match(cache.0.as_mut().unwrap(), input.clone())
97+
}
98+
9099
#[cfg_attr(feature = "perf-inline", inline(always))]
91100
pub(crate) fn search_slots(
92101
&self,
@@ -212,6 +221,29 @@ impl BoundedBacktrackerEngine {
212221
}
213222
}
214223

224+
#[cfg_attr(feature = "perf-inline", inline(always))]
225+
pub(crate) fn is_match(
226+
&self,
227+
cache: &mut BoundedBacktrackerCache,
228+
input: &Input<'_>,
229+
) -> bool {
230+
#[cfg(feature = "nfa-backtrack")]
231+
{
232+
// OK because we only permit access to this engine when we know
233+
// the haystack is short enough for the backtracker to run without
234+
// reporting an error.
235+
self.0
236+
.try_is_match(cache.0.as_mut().unwrap(), input.clone())
237+
.unwrap()
238+
}
239+
#[cfg(not(feature = "nfa-backtrack"))]
240+
{
241+
// Impossible to reach because this engine is never constructed
242+
// if the requisite features aren't enabled.
243+
unreachable!()
244+
}
245+
}
246+
215247
#[cfg_attr(feature = "perf-inline", inline(always))]
216248
pub(crate) fn search_slots(
217249
&self,

0 commit comments

Comments
 (0)