diff --git a/diff/diff.go b/diff/diff.go index 646602a..334ec78 100644 --- a/diff/diff.go +++ b/diff/diff.go @@ -54,10 +54,12 @@ func (h *Hunk) Stat() Stat { } var ( - hunkPrefix = []byte("@@ ") + hunkPrefix = []byte("@@ ") + onlyInMessagePrefix = []byte("Only in ") ) const hunkHeader = "@@ -%d,%d +%d,%d @@" +const onlyInMessage = "Only in %s: %s\n" // diffTimeParseLayout is the layout used to parse the time in unified diff file // header timestamps. diff --git a/diff/diff_test.go b/diff/diff_test.go index 6ce0571..07c7303 100644 --- a/diff/diff_test.go +++ b/diff/diff_test.go @@ -496,6 +496,123 @@ func TestParseMultiFileDiffHeaders(t *testing.T) { }, }, }, + { + filename: "sample_contains_added_deleted_files.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "source_a/file_1.txt", + OrigTime: nil, + NewName: "source_b/file_1.txt", + NewTime: nil, + Extended: []string{ + "diff -u source_a/file_1.txt source_b/file_1.txt", + }, + }, + { + OrigName: "source_a/file_2.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "source_b/file_3.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + }, + }, + { + filename: "sample_contains_only_added_deleted_files.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "source_a/file_1.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "source_a/file_2.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "source_b/file_3.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + }, + }, + { + filename: "sample_onlyin_line_isnt_a_file_header.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "source_a/file_1.txt", + OrigTime: nil, + NewName: "source_b/file_1.txt", + NewTime: nil, + Extended: []string{ + "diff -u source_a/file_1.txt source_b/file_1.txt", + }, + }, + { + OrigName: "source_a/file_2.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: []string{ + "Only in universe!", + }, + }, + { + OrigName: "source_b/file_3.txt some unrelated stuff here.", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "source_b/file_3.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + }, + }, + { + filename: "sample_onlyin_complex_filenames.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "internal/trace/foo bar/bam", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "internal/trace/foo bar/bam: bar", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "internal/trace/hello/world: bazz", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + }, + }, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) @@ -574,6 +691,10 @@ func TestParseMultiFileDiffAndPrintMultiFileDiff(t *testing.T) { {filename: "long_line_multi.diff", wantFileDiffs: 3}, {filename: "empty.diff", wantFileDiffs: 0}, {filename: "empty_multi.diff", wantFileDiffs: 2}, + {filename: "sample_contains_added_deleted_files.diff", wantFileDiffs: 3}, + {filename: "sample_contains_only_added_deleted_files.diff", wantFileDiffs: 3}, + {filename: "sample_onlyin_line_isnt_a_file_header.diff", wantFileDiffs: 4}, + {filename: "sample_onlyin_complex_filenames.diff", wantFileDiffs: 3}, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) diff --git a/diff/parse.go b/diff/parse.go index 08cba66..8a36784 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "path/filepath" "strconv" "strings" "time" @@ -72,6 +73,12 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { } } + // FileDiff is added/deleted file + // No further collection of hunks needed + if fd.NewName == "" { + return fd, nil + } + // Before reading hunks, check to see if there are any. If there // aren't any, and there's another file after this file in the // diff, then the hunks reader will complain ErrNoHunkHeader. It's @@ -223,8 +230,16 @@ func (r *FileDiffReader) HunksReader() *HunksReader { // ReadFileHeaders reads the unified file diff header (the lines that // start with "---" and "+++" with the orig/new file names and -// timestamps). +// timestamps). Or which starts with "Only in " with dir path and filename. +// "Only in" message is supported in POSIX locale: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html#tag_20_34_10 func (r *FileDiffReader) ReadFileHeaders() (origName, newName string, origTimestamp, newTimestamp *time.Time, err error) { + if r.fileHeaderLine != nil { + if isOnlyMessage, source, filename := parseOnlyInMessage(r.fileHeaderLine); isOnlyMessage { + return filepath.Join(string(source), string(filename)), + "", nil, nil, nil + } + } + origName, origTimestamp, err = r.readOneFileHeader([]byte("--- ")) if err != nil { return "", "", nil, nil, err @@ -330,6 +345,12 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) { return xheaders, nil } + // Reached message that file is added/deleted + if isOnlyInMessage, _, _ := parseOnlyInMessage(line); isOnlyInMessage { + r.fileHeaderLine = line // pass to readOneFileHeader (see fileHeaderLine field doc) + return xheaders, nil + } + r.line++ r.offset += int64(len(line)) xheaders = append(xheaders, string(line)) @@ -403,6 +424,10 @@ var ( // ErrExtendedHeadersEOF is when an EOF was encountered while reading extended file headers, which means that there were no ---/+++ headers encountered before hunks (if any) began. ErrExtendedHeadersEOF = errors.New("expected file header while reading extended headers, got EOF") + + // ErrBadOnlyInMessage is when a file have a malformed `only in` message + // Should be in format `Only in {source}: {filename}` + ErrBadOnlyInMessage = errors.New("bad 'only in' message") ) // ParseHunks parses hunks from a unified diff. The diff must consist @@ -612,6 +637,19 @@ func (r *HunksReader) ReadAllHunks() ([]*Hunk, error) { } } +// parseOnlyInMessage checks if line is a "Only in {source}: {filename}" and returns source and filename +func parseOnlyInMessage(line []byte) (bool, []byte, []byte) { + if !bytes.HasPrefix(line, onlyInMessagePrefix) { + return false, nil, nil + } + line = line[len(onlyInMessagePrefix):] + idx := bytes.Index(line, []byte(": ")) + if idx < 0 { + return false, nil, nil + } + return true, line[:idx], line[idx+2:] +} + // A ParseError is a description of a unified diff syntax error. type ParseError struct { Line int // Line where the error occurred diff --git a/diff/print.go b/diff/print.go index d440cb9..36106d1 100644 --- a/diff/print.go +++ b/diff/print.go @@ -4,6 +4,7 @@ import ( "bytes" "fmt" "io" + "path/filepath" "time" "sourcegraph.com/sqs/pbtypes" @@ -36,6 +37,16 @@ func PrintFileDiff(d *FileDiff) ([]byte, error) { } } + // FileDiff is added/deleted file + // No further hunks printing needed + if d.NewName == "" { + _, err := fmt.Fprintf(&buf, onlyInMessage, filepath.Dir(d.OrigName), filepath.Base(d.OrigName)) + if err != nil { + return nil, err + } + return buf.Bytes(), nil + } + if d.Hunks == nil { return buf.Bytes(), nil } diff --git a/diff/testdata/sample_contains_added_deleted_files.diff b/diff/testdata/sample_contains_added_deleted_files.diff new file mode 100644 index 0000000..cb777ac --- /dev/null +++ b/diff/testdata/sample_contains_added_deleted_files.diff @@ -0,0 +1,11 @@ +diff -u source_a/file_1.txt source_b/file_1.txt +--- source_a/file_1.txt ++++ source_b/file_1.txt +@@ -2,3 +3,4 @@ + To be, or not to be, that is the question: +-Whether 'tis nobler in the mind to suffer ++The slings and arrows of outrageous fortune, ++Or to take arms against a sea of troubles + And by opposing end them. To die—to sleep, +Only in source_a: file_2.txt +Only in source_b: file_3.txt diff --git a/diff/testdata/sample_contains_only_added_deleted_files.diff b/diff/testdata/sample_contains_only_added_deleted_files.diff new file mode 100644 index 0000000..e94a44f --- /dev/null +++ b/diff/testdata/sample_contains_only_added_deleted_files.diff @@ -0,0 +1,3 @@ +Only in source_a: file_1.txt +Only in source_a: file_2.txt +Only in source_b: file_3.txt diff --git a/diff/testdata/sample_onlyin_complex_filenames.diff b/diff/testdata/sample_onlyin_complex_filenames.diff new file mode 100644 index 0000000..bf7b58f --- /dev/null +++ b/diff/testdata/sample_onlyin_complex_filenames.diff @@ -0,0 +1,3 @@ +Only in internal/trace/foo bar: bam +Only in internal/trace/foo bar: bam: bar +Only in internal/trace/hello: world: bazz diff --git a/diff/testdata/sample_onlyin_line_isnt_a_file_header.diff b/diff/testdata/sample_onlyin_line_isnt_a_file_header.diff new file mode 100644 index 0000000..8de8782 --- /dev/null +++ b/diff/testdata/sample_onlyin_line_isnt_a_file_header.diff @@ -0,0 +1,13 @@ +diff -u source_a/file_1.txt source_b/file_1.txt +--- source_a/file_1.txt ++++ source_b/file_1.txt +@@ -2,3 +3,4 @@ + To be, or not to be, that is the question: +-Whether 'tis nobler in the mind to suffer ++The slings and arrows of outrageous fortune, ++Or to take arms against a sea of troubles + And by opposing end them. To die—to sleep, +Only in universe! +Only in source_a: file_2.txt +Only in source_b: file_3.txt some unrelated stuff here. +Only in source_b: file_3.txt