dotnet · carlossanlop · Aug 31, 2022 · Aug 28, 2022 · Aug 28, 2022 · Aug 29, 2022
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -1606,22 +1606,33 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax)
                         // Coalescing a loop with its same type
                         case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy when nextNode.Kind == currentNode.Kind && currentNode.Ch == nextNode.Ch:
                         case RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy when nextNode.Kind == currentNode.Kind && currentNode.Str == nextNode.Str:
-                            if (CanCombineCounts(currentNode.M, currentNode.N, nextNode.M, nextNode.N))
+                            if (nextNode.M > 0 &&
+                                currentNode.Kind is RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloopatomic)
                             {
-                                currentNode.M += nextNode.M;
-                                if (currentNode.N != int.MaxValue)
-                                {
-                                    currentNode.N = nextNode.N == int.MaxValue ? int.MaxValue : currentNode.N + nextNode.N;
-                                }
-                                next++;
-                                continue;
+                                // Atomic loops can only be combined if the second loop has no lower bound, as if it has a lower bound,
+                                // combining them changes behavior. Uncombined, the first loop can consume all matching items;
+                                // the second loop might then not be able to meet its minimum and fail.  But if they're combined, the combined
+                                // minimum of the sole loop could now be met, introducing matches where there shouldn't have been any.
+                                break;
                             }
-                            break;
+
+                            if (!CanCombineCounts(currentNode.M, currentNode.N, nextNode.M, nextNode.N))
+                            {
+                                break;
+                            }
+
+                            currentNode.M += nextNode.M;
+                            if (currentNode.N != int.MaxValue)
+                            {
+                                currentNode.N = nextNode.N == int.MaxValue ? int.MaxValue : currentNode.N + nextNode.N;
+                            }
+                            next++;
+                            continue;
 
                         // Coalescing a loop with an additional item of the same type
-                        case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.One && currentNode.Ch == nextNode.Ch:
-                        case RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy when nextNode.Kind == RegexNodeKind.Notone && currentNode.Ch == nextNode.Ch:
-                        case RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy when nextNode.Kind == RegexNodeKind.Set && currentNode.Str == nextNode.Str:
+                        case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.One && currentNode.Ch == nextNode.Ch:
+                        case RegexNodeKind.Notoneloop or RegexNodeKind.Notonelazy when nextNode.Kind == RegexNodeKind.Notone && currentNode.Ch == nextNode.Ch:
+                        case RegexNodeKind.Setloop or RegexNodeKind.Setlazy when nextNode.Kind == RegexNodeKind.Set && currentNode.Str == nextNode.Str:
                             if (CanCombineCounts(currentNode.M, currentNode.N, 1, 1))
                             {
                                 currentNode.M++;
@@ -1635,7 +1646,7 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax)
                             break;
 
                         // Coalescing a loop with a subsequent string
-                        case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.Multi && currentNode.Ch == nextNode.Str![0]:
+                        case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.Multi && currentNode.Ch == nextNode.Str![0]:
                             {
                                 // Determine how many of the multi's characters can be combined.
                                 // We already checked for the first, so we know it's at least one.
@@ -2056,15 +2067,15 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
                             case RegexNodeKind.Multi when node.Ch != subsequent.Str![0]:
                             case RegexNodeKind.End:
                             case RegexNodeKind.EndZ or RegexNodeKind.Eol when node.Ch != '\n':
-                            case RegexNodeKind.Boundary when RegexCharClass.IsBoundaryWordChar(node.Ch):
-                            case RegexNodeKind.NonBoundary when !RegexCharClass.IsBoundaryWordChar(node.Ch):
-                            case RegexNodeKind.ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch):
-                            case RegexNodeKind.NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch):
                                 return true;
 
                             case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && node.Ch != subsequent.Ch:
                             case RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic when subsequent.M == 0 && node.Ch == subsequent.Ch:
                             case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
+                            case RegexNodeKind.Boundary when RegexCharClass.IsBoundaryWordChar(node.Ch):
+                            case RegexNodeKind.NonBoundary when !RegexCharClass.IsBoundaryWordChar(node.Ch):
+                            case RegexNodeKind.ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch):
+                            case RegexNodeKind.NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch):
                                 // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well.
                                 break;
 
@@ -2103,14 +2114,14 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
                             case RegexNodeKind.Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!):
                             case RegexNodeKind.End:
                             case RegexNodeKind.EndZ or RegexNodeKind.Eol when !RegexCharClass.CharInClass('\n', node.Str!):
-                            case RegexNodeKind.Boundary when node.Str is RegexCharClass.WordClass or RegexCharClass.DigitClass:
-                            case RegexNodeKind.NonBoundary when node.Str is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass:
-                            case RegexNodeKind.ECMABoundary when node.Str is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass:
-                            case RegexNodeKind.NonECMABoundary when node.Str is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass:
                                 return true;
 
                             case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                             case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
+                            case RegexNodeKind.Boundary when node.Str is RegexCharClass.WordClass or RegexCharClass.DigitClass:
+                            case RegexNodeKind.NonBoundary when node.Str is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass:
+                            case RegexNodeKind.ECMABoundary when node.Str is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass:
+                            case RegexNodeKind.NonECMABoundary when node.Str is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass:
                                 // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well.
                                 break;
 
@@ -2125,8 +2136,6 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
 
                 // We only get here if the node could be made atomic based on subsequent but subsequent has a lower bound of zero
                 // and thus we need to move subsequent to be the next node in sequence and loop around to try again.
-                Debug.Assert(subsequent.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy);
-                Debug.Assert(subsequent.M == 0);
                 if (!iterateNullableSubsequent)
                 {
                     return false;

diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
@@ -149,6 +149,15 @@ public static IEnumerable<object[]> Match_MemberData()
                     yield return (Case("(?>[^z]+)z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz");
                     yield return (Case("(?>(?>[^z]+))z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz");
                     yield return (Case("(?>[^z]*)z123"), "zzzzxyxyxyz123", options, 4, 10, true, "xyxyxyz123");
+                    yield return (Case("(?>a*)a"), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>a*)a+"), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>a+)a+"), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>.*)."), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>.*).+"), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>.+).+"), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>\\w*)\\w"), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>\\w*)\\w+"), "aaa", options, 0, 3, false, "");
+                    yield return (Case("(?>\\w+)\\w+"), "aaa", options, 0, 3, false, "");
 
                     yield return (Case("(?>[^12]+)1"), "121231", options, 0, 6, true, "31");
                     yield return (Case("(?>[^123]+)1"), "12312341", options, 0, 8, true, "41");

diff --git a/...aries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs b/...aries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs
@@ -276,6 +276,22 @@ public static IEnumerable<object[]> Matches_TestData()
                      }
                 };
 
+                yield return new object[]
+                {
+                    engine,
+                    @"\w*\b\w+", "abc def ghij kl m nop qrstuv", RegexOptions.None,
+                    new[]
+                    {
+                        new CaptureData("abc", 0, 3),
+                        new CaptureData("def", 4, 3),
+                        new CaptureData("ghij", 8, 4),
+                        new CaptureData("kl", 13, 2),
+                        new CaptureData("m", 16, 1),
+                        new CaptureData("nop", 18, 3),
+                        new CaptureData("qrstuv", 22, 6),
+                    }
+                };
+
                 if (!PlatformDetection.IsNetFramework)
                 {
                     // .NET Framework missing fix in https://github.com/dotnet/runtime/pull/1075
@@ -294,6 +310,20 @@ public static IEnumerable<object[]> Matches_TestData()
 
                 if (!RegexHelpers.IsNonBacktracking(engine))
                 {
+                    yield return new object[]
+                    {
+                        engine,
+                        @"(\b(?!ab|nop)\w*\b)\w+", "abc def ghij kl m nop qrstuv", RegexOptions.None,
+                        new[]
+                        {
+                            new CaptureData("def", 4, 3),
+                            new CaptureData("ghij", 8, 4),
+                            new CaptureData("kl", 13, 2),
+                            new CaptureData("m", 16, 1),
+                            new CaptureData("qrstuv", 22, 6),
+                        }
+                    };
+
                     yield return new object[]
                     {
                         engine,