Skip to content

Commit b4c8073

Browse files
authored
Add text position support in Then() (#267)
* New Then() overload to use start/end of a result * Add text position support in Then()
1 parent e553de3 commit b4c8073

File tree

5 files changed

+85
-4
lines changed

5 files changed

+85
-4
lines changed

docs/parsers.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,7 @@ Convert the result of a parser. This is usually used to create custom data struc
796796
```c#
797797
Parser<U> Then<U>(Func<T, U> conversion)
798798
Parser<U> Then<U>(Func<ParseContext, T, U> conversion)
799+
Parser<U> Then<U>(Func<ParseContext, int, int, T, U> conversion)
799800
Parser<U> Then<U>(U value)
800801
Parser<U?> Then<U>() // Converts the result to `U`
801802
```
@@ -827,6 +828,28 @@ var parser = OneOf(
827828
);
828829
```
829830

831+
#### Accessing Start and End Positions
832+
833+
The `Func<ParseContext, int, int, T, U>` overload provides access to the start and end offsets of the parsed result:
834+
835+
```c#
836+
var parser = Literals.Identifier().Then((context, start, end, value) =>
837+
{
838+
var length = end - start;
839+
return $"Parsed '{value}' at offset {start}, length {length}";
840+
});
841+
842+
parser.Parse("hello");
843+
```
844+
845+
Result:
846+
847+
```
848+
"Parsed 'hello' at offset 0, length 5"
849+
```
850+
851+
> **Note:** The start and end parameters are integer offsets (positions in the input buffer), not `TextPosition` objects. For `Literals` parsers, these offsets correspond exactly to where the parser matched. For `Terms` parsers (which skip whitespace), the behavior differs slightly between compiled and non-compiled modes due to how whitespace skipping is handled in the compilation process.
852+
830853
### Else
831854

832855
Returns a value if the previous parser failed.

src/Parlot/Fluent/Parser.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ public abstract partial class Parser<T>
1919
/// </summary>
2020
public Parser<U> Then<U>(Func<ParseContext, T, U> conversion) => new Then<T, U>(this, conversion);
2121

22+
/// <summary>
23+
/// Builds a parser that converts the previous result, and can use the <see cref="ParseContext"/> and the start and end offsets.
24+
/// </summary>
25+
public Parser<U> Then<U>(Func<ParseContext, int, int, T, U> conversion) => new Then<T, U>(this, conversion);
26+
2227
/// <summary>
2328
/// Builds a parser that converts the previous result.
2429
/// </summary>

src/Parlot/Fluent/SkipWhiteSpace.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public CompilationResult Compile(CompilationContext context)
6060
{
6161
var result = context.CreateCompilationResult<T>();
6262

63-
var start = context.DeclarePositionVariable(result);
63+
var startBeforeWhitespace = context.DeclarePositionVariable(result);
6464

6565
var parserCompileResult = Parser.Build(context);
6666

@@ -78,7 +78,7 @@ public CompilationResult Compile(CompilationContext context)
7878
Expression.Assign(result.Value, parserCompileResult.Value),
7979
Expression.Assign(result.Success, Expression.Constant(true, typeof(bool)))
8080
),
81-
context.ResetPosition(start)
81+
context.ResetPosition(startBeforeWhitespace)
8282
)
8383
)
8484
)

src/Parlot/Fluent/Then.cs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ public sealed class Then<T, U> : Parser<U>, ICompilable, ISeekable
1616
{
1717
private readonly Func<T, U>? _action1;
1818
private readonly Func<ParseContext, T, U>? _action2;
19+
private readonly Func<ParseContext, int, int, T, U>? _action3;
1920
private readonly U? _value;
2021
private readonly Parser<T> _parser;
2122

@@ -41,6 +42,11 @@ public Then(Parser<T> parser, Func<ParseContext, T, U> action) : this(parser)
4142
_action2 = action ?? throw new ArgumentNullException(nameof(action));
4243
}
4344

45+
public Then(Parser<T> parser, Func<ParseContext, int, int, T, U> action) : this(parser)
46+
{
47+
_action3 = action ?? throw new ArgumentNullException(nameof(action));
48+
}
49+
4450
public Then(Parser<T> parser, U value) : this(parser)
4551
{
4652
_value = value;
@@ -68,9 +74,13 @@ public override bool Parse(ParseContext context, ref ParseResult<U> result)
6874
{
6975
result.Set(parsed.Start, parsed.End, _action2.Invoke(context, parsed.Value));
7076
}
77+
else if (_action3 != null)
78+
{
79+
result.Set(parsed.Start, parsed.End, _action3.Invoke(context, parsed.Start, parsed.End, parsed.Value));
80+
}
7181
else
7282
{
73-
// _value can't be null if action1 and action2 are null
83+
// _value can't be null if action1, action2, and action3 are null
7484
result.Set(parsed.Start, parsed.End, _value!);
7585
}
7686

@@ -88,12 +98,27 @@ public CompilationResult Compile(CompilationContext context)
8898

8999
// parse1 instructions
90100
//
101+
// var startOffset = context.Scanner.Cursor.Offset; // Only for _action3
102+
// parser1 body (which may include whitespace skipping for Terms)
91103
// if (parser1.Success)
92104
// {
105+
// var endOffset = context.Scanner.Cursor.Offset; // Only for _action3
93106
// success = true;
94-
// value = action(parse1.Value);
107+
// value = action(parse1.Value) // or action(context, start, end, parse1.Value) for _action3
95108
// }
96109

110+
ParameterExpression? startOffset = null;
111+
ParameterExpression? endOffset = null;
112+
113+
if (_action3 != null)
114+
{
115+
// Capture the start offset before the parser runs
116+
// Note: For Terms parsers (which skip whitespace), this will be before whitespace is skipped
117+
// This differs from non-compiled mode where parsed.Start is after whitespace skipping
118+
startOffset = result.DeclareVariable<int>($"startOffset{context.NextNumber}", context.Offset());
119+
endOffset = result.DeclareVariable<int>($"endOffset{context.NextNumber}");
120+
}
121+
97122
var parserCompileResult = _parser.Build(context, requireResult: true);
98123

99124
Expression assignValue;
@@ -110,6 +135,16 @@ public CompilationResult Compile(CompilationContext context)
110135
? Expression.Invoke(Expression.Constant(_action2), [context.ParseContext, parserCompileResult.Value])
111136
: Expression.Assign(result.Value, Expression.Invoke(Expression.Constant(_action2), [context.ParseContext, parserCompileResult.Value]));
112137
}
138+
else if (_action3 != null)
139+
{
140+
// Capture end offset when parser succeeds, then invoke the action
141+
assignValue = Expression.Block(
142+
Expression.Assign(endOffset!, context.Offset()),
143+
context.DiscardResult
144+
? Expression.Invoke(Expression.Constant(_action3), [context.ParseContext, startOffset!, endOffset!, parserCompileResult.Value])
145+
: Expression.Assign(result.Value, Expression.Invoke(Expression.Constant(_action3), [context.ParseContext, startOffset!, endOffset!, parserCompileResult.Value]))
146+
);
147+
}
113148
else
114149
{
115150
assignValue = context.DiscardResult

test/Parlot.Tests/FluentTests.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,24 @@ public void ThenShouldOnlyBeInvokedIfParserSucceeded()
169169
Assert.True(invoked);
170170
}
171171

172+
[Fact]
173+
public void ThenShouldProvideStartAndEndOffsets()
174+
{
175+
// Use Literals for consistent behavior between compiled and non-compiled modes
176+
var parser = Literals.Identifier().Then((context, start, end, value) =>
177+
{
178+
return $"{value}:{start}-{end}";
179+
});
180+
181+
Assert.True(parser.TryParse("hello", out var result));
182+
Assert.Equal("hello:0-5", result);
183+
184+
// Test with compiled parser - should have the same behavior
185+
var compiled = parser.Compile();
186+
Assert.True(compiled.TryParse("world", out var result2));
187+
Assert.Equal("world:0-5", result2);
188+
}
189+
172190
[Fact]
173191
public void BetweenShouldParseBetweenTwoString()
174192
{

0 commit comments

Comments
 (0)