diff --git a/diff/reverse.go b/diff/reverse.go new file mode 100644 index 0000000..7ceac7d --- /dev/null +++ b/diff/reverse.go @@ -0,0 +1,151 @@ +package diff + +import ( + "bytes" + "errors" + "fmt" + "regexp" +) + +// ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation. +// This is a FileDiff that undoes the edit of the original. +func ReverseFileDiff(fd *FileDiff) (*FileDiff, error) { + reverse := FileDiff{ + OrigName: fd.NewName, + OrigTime: fd.NewTime, + NewName: fd.OrigName, + NewTime: fd.OrigTime, + Extended: fd.Extended, + } + for _, hunk := range fd.Hunks { + invHunk, err := reverseHunk(hunk) + if err != nil { + return nil, err + } + reverse.Hunks = append(reverse.Hunks, invHunk) + } + return &reverse, nil +} + +// ReverseMultiFileDiff reverses a series of FileDiffs. +func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) { + var reverse []*FileDiff + for _, fd := range fds { + r, err := ReverseFileDiff(fd) + if err != nil { + return nil, err + } + reverse = append(reverse, r) + } + return reverse, nil +} + +// A subhunk represents a portion of a Hunk.Body, split into three sections. +// It consists of zero or more context lines, followed by zero or more orig +// lines and then zero or more new lines. +// +// Each line is stored WITHOUT its starting character, but with the newlines +// included. The final entry in a section may be missing a trailing newline. +// +// A missing newline in orig is represented in a Hunk by OrigNoNewlineAt, +// but is represented here as a missing newline. +type subhunk struct { + context [][]byte + orig [][]byte + new [][]byte +} + +// reverseHunk converts a Hunk into its reverse operation. +func reverseHunk(forward *Hunk) (*Hunk, error) { + reverse := Hunk{ + OrigStartLine: forward.NewStartLine, + OrigLines: forward.NewLines, + OrigNoNewlineAt: 0, // we may change this below + NewStartLine: forward.OrigStartLine, + NewLines: forward.OrigLines, + Section: forward.Section, + StartPosition: forward.StartPosition, + } + subs, err := toSubhunks(forward) + if err != nil { + return nil, err + } + for _, sub := range subs { + invSub := subhunk{ + context: sub.context, + orig: sub.new, + new: sub.orig, + } + for _, line := range invSub.context { + reverse.Body = append(reverse.Body, ' ') + reverse.Body = append(reverse.Body, line...) + } + for _, line := range invSub.orig { + reverse.Body = append(reverse.Body, '-') + reverse.Body = append(reverse.Body, line...) + } + if len(invSub.orig) > 0 && reverse.Body[len(reverse.Body)-1] != '\n' { + // There was a missing newline in `orig`, which we encode in a + // hunk with an offset. + reverse.Body = append(reverse.Body, '\n') + reverse.OrigNoNewlineAt = int32(len(reverse.Body)) + } + for _, line := range invSub.new { + reverse.Body = append(reverse.Body, '+') + reverse.Body = append(reverse.Body, line...) + } + } + return &reverse, nil +} + +var subhunkLineRe = regexp.MustCompile(`^.[^\n]*(\n|$)`) + +func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte { + var lines [][]byte + for len(*from) > 0 && (*from)[0] == startingWith { + line := subhunkLineRe.Find(*from) + lines = append(lines, line[1:]) + *from = (*from)[len(line):] + } + return lines +} + +// Extracts the subhunks from a diff.Hunk. +// +// This groups a Hunk's buffer into one or more subhunks, matching the conditions +// of `subhunk` above. This function groups, strips prefix characters, and strips +// a newline for `OrigNoNewlineAt` if necessary. +func toSubhunks(hunk *Hunk) ([]subhunk, error) { + var body []byte = hunk.Body + var subhunks []subhunk + if len(body) == 0 { + return nil, nil + } + for len(body) > 0 { + sh := subhunk{ + context: extractLinesStartingWith(&body, ' '), + orig: extractLinesStartingWith(&body, '-'), + new: extractLinesStartingWith(&body, '+'), + } + if len(sh.context) == 0 && len(sh.orig) == 0 && len(sh.new) == 0 { + // The first line didn't start with any expected prefix. + return nil, fmt.Errorf("unexpected character %q at start of line", body[0]) + } + subhunks = append(subhunks, sh) + } + if hunk.OrigNoNewlineAt > 0 { + // The Hunk represents a missing newline at the end of an "orig" line with a + // OrigNoNewlineAt index. We represent it here as an actual missing newline. + var lastSubhunk *subhunk = &subhunks[len(subhunks)-1] + s := len(lastSubhunk.orig) + if s == 0 { + return nil, errors.New("inconsistent OrigNoNewlineAt in input") + } + var cut bool + lastSubhunk.orig[s-1], cut = bytes.CutSuffix(lastSubhunk.orig[s-1], []byte("\n")) + if !cut { + return nil, errors.New("missing newline in input") + } + } + return subhunks, nil +} diff --git a/diff/reverse_test.go b/diff/reverse_test.go new file mode 100644 index 0000000..313ac6a --- /dev/null +++ b/diff/reverse_test.go @@ -0,0 +1,146 @@ +package diff + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestReverseHunks(t *testing.T) { + tests := []struct { + inputFile string + wantFile string + }{ + { + inputFile: "sample_hunks.diff", + wantFile: "sample_hunks.reversed", + }, + { + inputFile: "no_newline_new.diff", + wantFile: "no_newline_new.reversed", + }, + { + inputFile: "no_newline_orig.diff", + wantFile: "no_newline_orig.reversed", + }, + { + inputFile: "no_newline_both.diff", + wantFile: "no_newline_both.reversed", + }, + } + for _, test := range tests { + inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile)) + if err != nil { + t.Fatal(err) + } + wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile)) + if err != nil { + t.Fatal(err) + } + input, err := ParseHunks(inputData) + if err != nil { + t.Fatal(err) + } + + var reversed []*Hunk + for _, in := range input { + out, err := reverseHunk(in) + if err != nil { + // This should only fail if the Hunk data structure is inconsistent + t.Errorf("%s: Unexpected reverseHunk() error: %s", test.inputFile, err) + } + reversed = append(reversed, out) + } + gotData, err := PrintHunks(reversed) + if err != nil { + t.Errorf("%s: PrintHunks of reversed data: %s", test.inputFile, err) + } + if !bytes.Equal(wantData, gotData) { + t.Errorf("%s: Reversed hunk does not match expected.\nWant vs got:\n%s", + test.inputFile, cmp.Diff(wantData, gotData)) + } + } +} + +func TestReverseFileDiff(t *testing.T) { + tests := []struct { + inputFile string + wantFile string + }{ + { + inputFile: "sample_file.diff", + wantFile: "sample_file.reversed", + }, + } + for _, test := range tests { + inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile)) + if err != nil { + t.Fatal(err) + } + wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile)) + if err != nil { + t.Fatal(err) + } + input, err := ParseFileDiff(inputData) + if err != nil { + t.Fatal(err) + } + reversed, err := ReverseFileDiff(input) + if err != nil { + t.Errorf("%s: ReverseFileDiff: %s", test.inputFile, err) + } + gotData, err := PrintFileDiff(reversed) + if err != nil { + t.Errorf("%s: PrintFileDiff of reversed data: %s", test.inputFile, err) + } + if !bytes.Equal(wantData, gotData) { + t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s", + test.inputFile, cmp.Diff(wantData, gotData)) + } + } +} + +func TestReverseMultiFileDiff(t *testing.T) { + tests := []struct { + inputFile string + wantFile string + }{ + { + inputFile: "sample_file.diff", + wantFile: "sample_file.reversed", + }, + { + inputFile: "sample_multi_file.diff", + wantFile: "sample_multi_file.reversed", + }, + } + for _, test := range tests { + inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile)) + if err != nil { + t.Fatal(err) + } + wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile)) + if err != nil { + t.Fatal(err) + } + input, err := ParseMultiFileDiff(inputData) + if err != nil { + t.Fatal(err) + } + reversed, err := ReverseMultiFileDiff(input) + if err != nil { + t.Errorf("%s: ReverseMultiFileDiff: %s", test.inputFile, err) + } + gotData, err := PrintMultiFileDiff(reversed) + if err != nil { + t.Errorf("%s: PrintMultiFileDiff of reversed data: %s", test.inputFile, err) + } + if !bytes.Equal(wantData, gotData) { + t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s", + test.inputFile, cmp.Diff(wantData, gotData)) + } + } +} diff --git a/diff/testdata/no_newline_both.reversed b/diff/testdata/no_newline_both.reversed new file mode 100644 index 0000000..52ab7bc --- /dev/null +++ b/diff/testdata/no_newline_both.reversed @@ -0,0 +1,5 @@ +@@ -1,1 +1,1 @@ +-b +\ No newline at end of file ++a +\ No newline at end of file diff --git a/diff/testdata/no_newline_new.reversed b/diff/testdata/no_newline_new.reversed new file mode 100644 index 0000000..443aa8a --- /dev/null +++ b/diff/testdata/no_newline_new.reversed @@ -0,0 +1,6 @@ +@@ -1,2 +1,3 @@ + a +-a +\ No newline at end of file ++a ++a diff --git a/diff/testdata/no_newline_orig.reversed b/diff/testdata/no_newline_orig.reversed new file mode 100644 index 0000000..df02b0c --- /dev/null +++ b/diff/testdata/no_newline_orig.reversed @@ -0,0 +1,4 @@ +@@ -1,1 +1,1 @@ +-b ++a +\ No newline at end of file diff --git a/diff/testdata/sample_file.reversed b/diff/testdata/sample_file.reversed new file mode 100644 index 0000000..90eb49b --- /dev/null +++ b/diff/testdata/sample_file.reversed @@ -0,0 +1,31 @@ +--- newname 2009-10-11 15:12:30.000000000 +0000 ++++ oldname 2009-10-11 15:12:20.000000000 +0000 +@@ -1,9 +1,3 @@ +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On + the other hand, a + misspelled word isn't + the end of the world. diff --git a/diff/testdata/sample_hunks.reversed b/diff/testdata/sample_hunks.reversed new file mode 100644 index 0000000..1ac7d8a --- /dev/null +++ b/diff/testdata/sample_hunks.reversed @@ -0,0 +1,37 @@ +@@ -1,9 +1,3 @@ Section Header +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On + the other hand, a + misspelled word isn't + the end of the world. +@@ -22,7 +22,3 @@ + this paragraph needs to + be changed. Things can + be added after it. +- +-This paragraph contains +-important new additions +-to this document. diff --git a/diff/testdata/sample_multi_file.reversed b/diff/testdata/sample_multi_file.reversed new file mode 100644 index 0000000..283f8e4 --- /dev/null +++ b/diff/testdata/sample_multi_file.reversed @@ -0,0 +1,63 @@ +diff --ruN a/oldname1 b/newname1 +old mode 0777 +new mode 0755 +--- newname1 2009-10-11 15:12:30.000000000 +0000 ++++ oldname1 2009-10-11 15:12:20.000000000 +0000 +@@ -1,9 +1,3 @@ +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On +diff --ruN a/oldname2 b/newname2 +--- newname2 2009-10-11 15:12:30.000000000 +0000 ++++ oldname2 2009-10-11 15:12:20.000000000 +0000 +@@ -1,9 +1,3 @@ +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On + the other hand, a + misspelled word isn't + the end of the world.