Skip to content

Commit 6effc01

Browse files
committed
Simplify streaming patch boundary parsing
1 parent 2d9f8bf commit 6effc01

File tree

1 file changed

+173
-76
lines changed

1 file changed

+173
-76
lines changed

codex-rs/apply-patch/src/parser.rs

Lines changed: 173 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -185,60 +185,30 @@ enum ParseMode {
185185
Streaming,
186186
}
187187

188-
struct PatchScope<'a> {
189-
lines: &'a [&'a str],
190-
body_end_index: usize,
191-
allow_incomplete_final_hunk: bool,
192-
}
193-
194-
impl<'a> PatchScope<'a> {
195-
fn complete(lines: &'a [&'a str]) -> Self {
196-
Self {
197-
lines,
198-
body_end_index: lines.len().saturating_sub(1),
199-
allow_incomplete_final_hunk: false,
200-
}
201-
}
202-
203-
fn streaming(lines: &'a [&'a str]) -> Self {
204-
let body_end_index = if lines
205-
.last()
206-
.is_some_and(|line| line.trim() == END_PATCH_MARKER)
207-
{
208-
lines.len().saturating_sub(1)
209-
} else {
210-
lines.len()
211-
};
212-
Self {
213-
lines,
214-
body_end_index,
215-
allow_incomplete_final_hunk: true,
216-
}
217-
}
218-
}
219-
220188
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
221189
let lines: Vec<&str> = patch.trim().lines().collect();
222-
let scope = match check_patch_boundaries_strict(&lines) {
223-
Ok(scope) => scope,
224-
Err(e) => match mode {
225-
ParseMode::Strict => {
226-
return Err(e);
227-
}
228-
ParseMode::Lenient => check_patch_boundaries_lenient(&lines, e)?,
229-
ParseMode::Streaming => check_patch_boundaries_streaming(&lines, e)?,
230-
},
190+
let (patch_lines, hunk_lines) = match mode {
191+
ParseMode::Strict => check_patch_boundaries_strict(&lines)?,
192+
ParseMode::Lenient => check_patch_boundaries_lenient(&lines)?,
193+
ParseMode::Streaming => check_patch_boundaries_streaming(&lines)?,
231194
};
232195

233196
let mut hunks: Vec<Hunk> = Vec::new();
234-
let mut remaining_lines = &scope.lines[1..scope.body_end_index];
197+
let mut remaining_lines = hunk_lines;
235198
let mut line_number = 2;
236199
while !remaining_lines.is_empty() {
237-
let parsed_hunk = parse_one_hunk(remaining_lines, line_number);
200+
let parsed_hunk = parse_one_hunk(
201+
remaining_lines,
202+
line_number,
203+
match mode {
204+
ParseMode::Streaming => IncompleteFinalChunk::Allow,
205+
ParseMode::Strict | ParseMode::Lenient => IncompleteFinalChunk::Deny,
206+
},
207+
);
238208
let (hunk, hunk_lines) = match parsed_hunk {
239209
Ok(parsed) => parsed,
240210
// Return accumulated hunks even if parsing the last one failed in streaming mode.
241-
Err(err) if scope.allow_incomplete_final_hunk => {
211+
Err(err) if matches!(mode, ParseMode::Streaming) => {
242212
if hunks.is_empty() {
243213
return Err(err);
244214
}
@@ -250,7 +220,7 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, Pars
250220
line_number += hunk_lines;
251221
remaining_lines = &remaining_lines[hunk_lines..]
252222
}
253-
let patch = scope.lines.join("\n");
223+
let patch = patch_lines.join("\n");
254224
Ok(ApplyPatchArgs {
255225
hunks,
256226
patch,
@@ -260,26 +230,35 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, Pars
260230

261231
fn check_patch_boundaries_streaming<'a>(
262232
original_lines: &'a [&'a str],
263-
original_parse_error: ParseError,
264-
) -> Result<PatchScope<'a>, ParseError> {
233+
) -> Result<(&'a [&'a str], &'a [&'a str]), ParseError> {
265234
match original_lines {
266235
[first, ..] if first.trim() == BEGIN_PATCH_MARKER => {
267-
Ok(PatchScope::streaming(original_lines))
236+
let body_lines = if original_lines
237+
.last()
238+
.is_some_and(|line| line.trim() == END_PATCH_MARKER)
239+
{
240+
&original_lines[1..original_lines.len() - 1]
241+
} else {
242+
&original_lines[1..]
243+
};
244+
Ok((original_lines, body_lines))
268245
}
269-
_ => Err(original_parse_error),
246+
_ => check_patch_boundaries_strict(original_lines),
270247
}
271248
}
272249

273250
/// Checks the start and end lines of the patch text for `apply_patch`,
274251
/// returning an error if they do not match the expected markers.
275-
fn check_patch_boundaries_strict<'a>(lines: &'a [&'a str]) -> Result<PatchScope<'a>, ParseError> {
252+
fn check_patch_boundaries_strict<'a>(
253+
lines: &'a [&'a str],
254+
) -> Result<(&'a [&'a str], &'a [&'a str]), ParseError> {
276255
let (first_line, last_line) = match lines {
277256
[] => (None, None),
278257
[first] => (Some(first), Some(first)),
279258
[first, .., last] => (Some(first), Some(last)),
280259
};
281260
check_start_and_end_lines_strict(first_line, last_line)?;
282-
Ok(PatchScope::complete(lines))
261+
Ok((lines, &lines[1..lines.len() - 1]))
283262
}
284263

285264
/// If we are in lenient mode, we check if the first line starts with `<<EOF`
@@ -291,8 +270,12 @@ fn check_patch_boundaries_strict<'a>(lines: &'a [&'a str]) -> Result<PatchScope<
291270
/// contents, excluding the heredoc markers.
292271
fn check_patch_boundaries_lenient<'a>(
293272
original_lines: &'a [&'a str],
294-
original_parse_error: ParseError,
295-
) -> Result<PatchScope<'a>, ParseError> {
273+
) -> Result<(&'a [&'a str], &'a [&'a str]), ParseError> {
274+
let original_parse_error = match check_patch_boundaries_strict(original_lines) {
275+
Ok(lines) => return Ok(lines),
276+
Err(e) => e,
277+
};
278+
296279
match original_lines {
297280
[first, .., last] => {
298281
if (first == &"<<EOF" || first == &"<<'EOF'" || first == &"<<\"EOF\"")
@@ -329,9 +312,19 @@ fn check_start_and_end_lines_strict(
329312
}
330313
}
331314

315+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
316+
enum IncompleteFinalChunk {
317+
Allow,
318+
Deny,
319+
}
320+
332321
/// Attempts to parse a single hunk from the start of lines.
333322
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
334-
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
323+
fn parse_one_hunk(
324+
lines: &[&str],
325+
line_number: usize,
326+
incomplete_final_chunk: IncompleteFinalChunk,
327+
) -> Result<(Hunk, usize), ParseError> {
335328
// Be tolerant of case mismatches and extra padding around marker strings.
336329
let first_line = lines[0].trim();
337330
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
@@ -391,11 +384,30 @@ fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), P
391384
break;
392385
}
393386

394-
let (chunk, chunk_lines) = parse_update_file_chunk(
387+
if incomplete_final_chunk == IncompleteFinalChunk::Allow && remaining_lines[0] == "@" {
388+
break;
389+
}
390+
391+
let parsed_chunk = parse_update_file_chunk(
395392
remaining_lines,
396393
line_number + parsed_lines,
397394
chunks.is_empty(),
398-
)?;
395+
);
396+
let (chunk, chunk_lines) = match parsed_chunk {
397+
Ok(parsed) => parsed,
398+
Err(err)
399+
if incomplete_final_chunk == IncompleteFinalChunk::Allow
400+
&& !chunks.is_empty()
401+
&& matches!(
402+
&err,
403+
InvalidHunkError { message, .. }
404+
if message == "Update hunk does not contain any lines"
405+
) =>
406+
{
407+
break;
408+
}
409+
Err(err) => return Err(err),
410+
};
399411
chunks.push(chunk);
400412
parsed_lines += chunk_lines;
401413
remaining_lines = &remaining_lines[chunk_lines..]
@@ -593,16 +605,43 @@ fn test_parse_patch_streaming() {
593605
fn test_parse_patch_streaming_large_patch_by_character() {
594606
let patch = "\
595607
*** Begin Patch
596-
*** Add File: added.txt
597-
+alpha
598-
+beta
599-
*** Update File: updated.txt
600-
*** Move to: moved.txt
601-
@@
602-
-old
603-
+new
604-
unchanged
605-
*** Delete File: deleted.txt
608+
*** Add File: docs/release-notes.md
609+
+# Release notes
610+
+
611+
+## CLI
612+
+- Surface apply_patch progress while arguments stream.
613+
+- Keep final patch application gated on the completed tool call.
614+
+- Include file summaries in the progress event payload.
615+
*** Update File: src/config.rs
616+
@@ impl Config
617+
- pub apply_patch_progress: bool,
618+
+ pub stream_apply_patch_progress: bool,
619+
pub include_diagnostics: bool,
620+
@@ fn default_progress_interval()
621+
- Duration::from_millis(500)
622+
+ Duration::from_millis(250)
623+
*** Delete File: src/legacy_patch_progress.rs
624+
*** Update File: crates/cli/src/main.rs
625+
*** Move to: crates/cli/src/bin/codex.rs
626+
@@ fn run()
627+
- let args = Args::parse();
628+
- dispatch(args)
629+
+ let cli = Cli::parse();
630+
+ dispatch(cli)
631+
*** Add File: tests/fixtures/apply_patch_progress.json
632+
+{
633+
+ \"type\": \"apply_patch_progress\",
634+
+ \"hunks\": [
635+
+ { \"operation\": \"add\", \"path\": \"docs/release-notes.md\" },
636+
+ { \"operation\": \"update\", \"path\": \"src/config.rs\" }
637+
+ ]
638+
+}
639+
*** Update File: README.md
640+
@@ Development workflow
641+
Build the Rust workspace before opening a pull request.
642+
+When touching streamed tool calls, include parser coverage for partial input.
643+
+Prefer tests that exercise the exact event payload shape.
644+
*** Delete File: docs/old-apply-patch-progress.md
606645
*** End Patch";
607646

608647
let mut max_hunk_count = 0;
@@ -622,27 +661,81 @@ fn test_parse_patch_streaming_large_patch_by_character() {
622661
}
623662
}
624663

625-
assert_eq!(saw_hunk_counts, vec![1, 2, 3]);
664+
assert_eq!(saw_hunk_counts, vec![1, 2, 3, 4, 5, 6, 7]);
626665
assert_eq!(
627666
parse_patch_streaming(patch),
628667
Ok(ApplyPatchArgs {
629668
hunks: vec![
630669
AddFile {
631-
path: PathBuf::from("added.txt"),
632-
contents: "alpha\nbeta\n".to_string(),
670+
path: PathBuf::from("docs/release-notes.md"),
671+
contents: "# Release notes\n\n## CLI\n- Surface apply_patch progress while arguments stream.\n- Keep final patch application gated on the completed tool call.\n- Include file summaries in the progress event payload.\n".to_string(),
672+
},
673+
UpdateFile {
674+
path: PathBuf::from("src/config.rs"),
675+
move_path: None,
676+
chunks: vec![
677+
UpdateFileChunk {
678+
change_context: Some("impl Config".to_string()),
679+
old_lines: vec![
680+
" pub apply_patch_progress: bool,".to_string(),
681+
" pub include_diagnostics: bool,".to_string(),
682+
],
683+
new_lines: vec![
684+
" pub stream_apply_patch_progress: bool,".to_string(),
685+
" pub include_diagnostics: bool,".to_string(),
686+
],
687+
is_end_of_file: false,
688+
},
689+
UpdateFileChunk {
690+
change_context: Some("fn default_progress_interval()".to_string()),
691+
old_lines: vec![" Duration::from_millis(500)".to_string()],
692+
new_lines: vec![" Duration::from_millis(250)".to_string()],
693+
is_end_of_file: false,
694+
},
695+
],
696+
},
697+
DeleteFile {
698+
path: PathBuf::from("src/legacy_patch_progress.rs"),
699+
},
700+
UpdateFile {
701+
path: PathBuf::from("crates/cli/src/main.rs"),
702+
move_path: Some(PathBuf::from("crates/cli/src/bin/codex.rs")),
703+
chunks: vec![UpdateFileChunk {
704+
change_context: Some("fn run()".to_string()),
705+
old_lines: vec![
706+
" let args = Args::parse();".to_string(),
707+
" dispatch(args)".to_string(),
708+
],
709+
new_lines: vec![
710+
" let cli = Cli::parse();".to_string(),
711+
" dispatch(cli)".to_string(),
712+
],
713+
is_end_of_file: false,
714+
}],
715+
},
716+
AddFile {
717+
path: PathBuf::from("tests/fixtures/apply_patch_progress.json"),
718+
contents: "{\n \"type\": \"apply_patch_progress\",\n \"hunks\": [\n { \"operation\": \"add\", \"path\": \"docs/release-notes.md\" },\n { \"operation\": \"update\", \"path\": \"src/config.rs\" }\n ]\n}\n".to_string(),
633719
},
634720
UpdateFile {
635-
path: PathBuf::from("updated.txt"),
636-
move_path: Some(PathBuf::from("moved.txt")),
721+
path: PathBuf::from("README.md"),
722+
move_path: None,
637723
chunks: vec![UpdateFileChunk {
638-
change_context: None,
639-
old_lines: vec!["old".to_string(), "unchanged".to_string()],
640-
new_lines: vec!["new".to_string(), "unchanged".to_string()],
724+
change_context: Some("Development workflow".to_string()),
725+
old_lines: vec![
726+
"Build the Rust workspace before opening a pull request.".to_string(),
727+
],
728+
new_lines: vec![
729+
"Build the Rust workspace before opening a pull request.".to_string(),
730+
"When touching streamed tool calls, include parser coverage for partial input."
731+
.to_string(),
732+
"Prefer tests that exercise the exact event payload shape.".to_string(),
733+
],
641734
is_end_of_file: false,
642735
}],
643736
},
644737
DeleteFile {
645-
path: PathBuf::from("deleted.txt"),
738+
path: PathBuf::from("docs/old-apply-patch-progress.md"),
646739
},
647740
],
648741
patch: patch.to_string(),
@@ -992,7 +1085,11 @@ fn test_parse_patch_lenient() {
9921085
#[test]
9931086
fn test_parse_one_hunk() {
9941087
assert_eq!(
995-
parse_one_hunk(&["bad"], /*line_number*/ 234),
1088+
parse_one_hunk(
1089+
&["bad"],
1090+
/*line_number*/ 234,
1091+
IncompleteFinalChunk::Deny
1092+
),
9961093
Err(InvalidHunkError {
9971094
message: "'bad' is not a valid hunk header. \
9981095
Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(),

0 commit comments

Comments
 (0)