From d75d85322b0d5ca793229436110130ad69f2a9f0 Mon Sep 17 00:00:00 2001 From: sofiia-tesliuk Date: Mon, 10 Aug 2020 14:58:37 +0300 Subject: [PATCH 1/3] add handling added/deleted files in diff; --- diff/diff.go | 4 +- diff/diff_test.go | 56 +++++++++++++++++++ diff/parse.go | 38 ++++++++++++- diff/print.go | 11 ++++ .../sample_contains_added_deleted_files.diff | 11 ++++ ...ple_contains_only_added_deleted_files.diff | 3 + 6 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 diff/testdata/sample_contains_added_deleted_files.diff create mode 100644 diff/testdata/sample_contains_only_added_deleted_files.diff diff --git a/diff/diff.go b/diff/diff.go index 646602a..334ec78 100644 --- a/diff/diff.go +++ b/diff/diff.go @@ -54,10 +54,12 @@ func (h *Hunk) Stat() Stat { } var ( - hunkPrefix = []byte("@@ ") + hunkPrefix = []byte("@@ ") + onlyInMessagePrefix = []byte("Only in ") ) const hunkHeader = "@@ -%d,%d +%d,%d @@" +const onlyInMessage = "Only in %s: %s\n" // diffTimeParseLayout is the layout used to parse the time in unified diff file // header timestamps. diff --git a/diff/diff_test.go b/diff/diff_test.go index 6ce0571..b6ec723 100644 --- a/diff/diff_test.go +++ b/diff/diff_test.go @@ -496,6 +496,60 @@ func TestParseMultiFileDiffHeaders(t *testing.T) { }, }, }, + { + filename: "sample_contains_added_deleted_files.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "source_a/file_1.txt", + OrigTime: nil, + NewName: "source_b/file_1.txt", + NewTime: nil, + Extended: []string{ + "diff -u source_a/file_1.txt source_b/file_1.txt", + }, + }, + { + OrigName: "source_a/file_2.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "source_b/file_3.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + }, + }, + { + filename: "sample_contains_only_added_deleted_files.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "source_a/file_1.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "source_a/file_2.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "source_b/file_3.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + }, + }, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) @@ -574,6 +628,8 @@ func TestParseMultiFileDiffAndPrintMultiFileDiff(t *testing.T) { {filename: "long_line_multi.diff", wantFileDiffs: 3}, {filename: "empty.diff", wantFileDiffs: 0}, {filename: "empty_multi.diff", wantFileDiffs: 2}, + {filename: "sample_contains_added_deleted_files.diff", wantFileDiffs: 3}, + {filename: "sample_contains_only_added_deleted_files.diff", wantFileDiffs: 3}, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) diff --git a/diff/parse.go b/diff/parse.go index 08cba66..2dba97d 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "path/filepath" "strconv" "strings" "time" @@ -72,6 +73,12 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { } } + // FileDiff is added/deleted file + // No further collection of hunks needed + if fd.NewName == "" { + return fd, nil + } + // Before reading hunks, check to see if there are any. If there // aren't any, and there's another file after this file in the // diff, then the hunks reader will complain ErrNoHunkHeader. It's @@ -223,8 +230,31 @@ func (r *FileDiffReader) HunksReader() *HunksReader { // ReadFileHeaders reads the unified file diff header (the lines that // start with "---" and "+++" with the orig/new file names and -// timestamps). +// timestamps). Or starts with "Only in " message with dir path filename. func (r *FileDiffReader) ReadFileHeaders() (origName, newName string, origTimestamp, newTimestamp *time.Time, err error) { + if (r.fileHeaderLine != nil) && (bytes.HasPrefix(r.fileHeaderLine, onlyInMessagePrefix)) { + path := bytes.Split(bytes.TrimPrefix( + bytes.TrimSuffix(r.fileHeaderLine, []byte("\n")), + onlyInMessagePrefix), []byte(":")) + if len(path) != 2 { + return "", "", nil, nil, + &ParseError{r.line, r.offset, ErrBadOnlyInMessage} + } + + source, filename := string(bytes.TrimSpace(path[0])), string(bytes.TrimSpace(path[1])) + + unquotedSource, err := strconv.Unquote(source) + if err == nil { + source = unquotedSource + } + unquotedFilename, err := strconv.Unquote(filename) + if err == nil { + filename = unquotedFilename + } + + return filepath.Join(source, filename), "", nil, nil, nil + } + origName, origTimestamp, err = r.readOneFileHeader([]byte("--- ")) if err != nil { return "", "", nil, nil, err @@ -324,7 +354,7 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) { return xheaders, OverflowError(line) } } - if bytes.HasPrefix(line, []byte("--- ")) { + if bytes.HasPrefix(line, []byte("--- ")) || (bytes.HasPrefix(line, onlyInMessagePrefix)) { // We've reached the file header. r.fileHeaderLine = line // pass to readOneFileHeader (see fileHeaderLine field doc) return xheaders, nil @@ -403,6 +433,10 @@ var ( // ErrExtendedHeadersEOF is when an EOF was encountered while reading extended file headers, which means that there were no ---/+++ headers encountered before hunks (if any) began. ErrExtendedHeadersEOF = errors.New("expected file header while reading extended headers, got EOF") + + // ErrBadOnlyInMessage is when a file have a malformed `only in` message + // Should be in format `Only in {source}: {filename}` + ErrBadOnlyInMessage = errors.New("bad `only in` message") ) // ParseHunks parses hunks from a unified diff. The diff must consist diff --git a/diff/print.go b/diff/print.go index d440cb9..36106d1 100644 --- a/diff/print.go +++ b/diff/print.go @@ -4,6 +4,7 @@ import ( "bytes" "fmt" "io" + "path/filepath" "time" "sourcegraph.com/sqs/pbtypes" @@ -36,6 +37,16 @@ func PrintFileDiff(d *FileDiff) ([]byte, error) { } } + // FileDiff is added/deleted file + // No further hunks printing needed + if d.NewName == "" { + _, err := fmt.Fprintf(&buf, onlyInMessage, filepath.Dir(d.OrigName), filepath.Base(d.OrigName)) + if err != nil { + return nil, err + } + return buf.Bytes(), nil + } + if d.Hunks == nil { return buf.Bytes(), nil } diff --git a/diff/testdata/sample_contains_added_deleted_files.diff b/diff/testdata/sample_contains_added_deleted_files.diff new file mode 100644 index 0000000..cb777ac --- /dev/null +++ b/diff/testdata/sample_contains_added_deleted_files.diff @@ -0,0 +1,11 @@ +diff -u source_a/file_1.txt source_b/file_1.txt +--- source_a/file_1.txt ++++ source_b/file_1.txt +@@ -2,3 +3,4 @@ + To be, or not to be, that is the question: +-Whether 'tis nobler in the mind to suffer ++The slings and arrows of outrageous fortune, ++Or to take arms against a sea of troubles + And by opposing end them. To die—to sleep, +Only in source_a: file_2.txt +Only in source_b: file_3.txt diff --git a/diff/testdata/sample_contains_only_added_deleted_files.diff b/diff/testdata/sample_contains_only_added_deleted_files.diff new file mode 100644 index 0000000..e94a44f --- /dev/null +++ b/diff/testdata/sample_contains_only_added_deleted_files.diff @@ -0,0 +1,3 @@ +Only in source_a: file_1.txt +Only in source_a: file_2.txt +Only in source_b: file_3.txt From 4546b812a0e17ab32e28cd0768fecb50bc4b58b8 Mon Sep 17 00:00:00 2001 From: sofiia-tesliuk Date: Mon, 10 Aug 2020 17:45:09 +0300 Subject: [PATCH 2/3] corrected handling onlyIn message; --- diff/diff_test.go | 33 +++++++++++++ diff/parse.go | 47 ++++++++++--------- ...sample_onlyin_line_isnt_a_file_header.diff | 13 +++++ 3 files changed, 71 insertions(+), 22 deletions(-) create mode 100644 diff/testdata/sample_onlyin_line_isnt_a_file_header.diff diff --git a/diff/diff_test.go b/diff/diff_test.go index b6ec723..3a5540c 100644 --- a/diff/diff_test.go +++ b/diff/diff_test.go @@ -550,6 +550,38 @@ func TestParseMultiFileDiffHeaders(t *testing.T) { }, }, }, + { + filename: "sample_onlyin_line_isnt_a_file_header.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "source_a/file_1.txt", + OrigTime: nil, + NewName: "source_b/file_1.txt", + NewTime: nil, + Extended: []string{ + "diff -u source_a/file_1.txt source_b/file_1.txt", + }, + }, + { + OrigName: "source_a/file_2.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: []string{ + "Only in universe!", + }, + }, + { + OrigName: "source_b/file_3.txt", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: []string{ + "Only in source_b: file_3.txt some unrelated stuff here.", + }, + }, + }, + }, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) @@ -630,6 +662,7 @@ func TestParseMultiFileDiffAndPrintMultiFileDiff(t *testing.T) { {filename: "empty_multi.diff", wantFileDiffs: 2}, {filename: "sample_contains_added_deleted_files.diff", wantFileDiffs: 3}, {filename: "sample_contains_only_added_deleted_files.diff", wantFileDiffs: 3}, + {filename: "sample_onlyin_line_isnt_a_file_header.diff", wantFileDiffs: 3}, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) diff --git a/diff/parse.go b/diff/parse.go index 2dba97d..722bb6b 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "path/filepath" + "regexp" "strconv" "strings" "time" @@ -232,27 +233,11 @@ func (r *FileDiffReader) HunksReader() *HunksReader { // start with "---" and "+++" with the orig/new file names and // timestamps). Or starts with "Only in " message with dir path filename. func (r *FileDiffReader) ReadFileHeaders() (origName, newName string, origTimestamp, newTimestamp *time.Time, err error) { - if (r.fileHeaderLine != nil) && (bytes.HasPrefix(r.fileHeaderLine, onlyInMessagePrefix)) { - path := bytes.Split(bytes.TrimPrefix( - bytes.TrimSuffix(r.fileHeaderLine, []byte("\n")), - onlyInMessagePrefix), []byte(":")) - if len(path) != 2 { - return "", "", nil, nil, - &ParseError{r.line, r.offset, ErrBadOnlyInMessage} + if r.fileHeaderLine != nil { + if isOnlyMessage, source, filename := parseOnlyInMessage(r.fileHeaderLine); isOnlyMessage { + return filepath.Join(string(source), string(filename)), + "", nil, nil, nil } - - source, filename := string(bytes.TrimSpace(path[0])), string(bytes.TrimSpace(path[1])) - - unquotedSource, err := strconv.Unquote(source) - if err == nil { - source = unquotedSource - } - unquotedFilename, err := strconv.Unquote(filename) - if err == nil { - filename = unquotedFilename - } - - return filepath.Join(source, filename), "", nil, nil, nil } origName, origTimestamp, err = r.readOneFileHeader([]byte("--- ")) @@ -354,12 +339,18 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) { return xheaders, OverflowError(line) } } - if bytes.HasPrefix(line, []byte("--- ")) || (bytes.HasPrefix(line, onlyInMessagePrefix)) { + if bytes.HasPrefix(line, []byte("--- ")) { // We've reached the file header. r.fileHeaderLine = line // pass to readOneFileHeader (see fileHeaderLine field doc) return xheaders, nil } + // Reached message that file is added/deleted + if isOnlyInMessage, _, _ := parseOnlyInMessage(line); isOnlyInMessage { + r.fileHeaderLine = line // pass to readOneFileHeader (see fileHeaderLine field doc) + return xheaders, nil + } + r.line++ r.offset += int64(len(line)) xheaders = append(xheaders, string(line)) @@ -436,7 +427,7 @@ var ( // ErrBadOnlyInMessage is when a file have a malformed `only in` message // Should be in format `Only in {source}: {filename}` - ErrBadOnlyInMessage = errors.New("bad `only in` message") + ErrBadOnlyInMessage = errors.New("bad 'only in' message") ) // ParseHunks parses hunks from a unified diff. The diff must consist @@ -646,6 +637,18 @@ func (r *HunksReader) ReadAllHunks() ([]*Hunk, error) { } } +// parseOnlyInMessage checks if line is a "Only in {source}: {filename}" and returns source and filename +func parseOnlyInMessage(line []byte) (bool, []byte, []byte) { + re := regexp.MustCompile("Only in ([\x21-\x7E]+): ([\x21-\x7E]+)") + if re.Match(line) { + slices := re.FindSubmatch(line) + if fmt.Sprintf("Only in %s: %s", slices[1], slices[2]) == string(line) { + return true, slices[1], slices[2] + } + } + return false, nil, nil +} + // A ParseError is a description of a unified diff syntax error. type ParseError struct { Line int // Line where the error occurred diff --git a/diff/testdata/sample_onlyin_line_isnt_a_file_header.diff b/diff/testdata/sample_onlyin_line_isnt_a_file_header.diff new file mode 100644 index 0000000..8de8782 --- /dev/null +++ b/diff/testdata/sample_onlyin_line_isnt_a_file_header.diff @@ -0,0 +1,13 @@ +diff -u source_a/file_1.txt source_b/file_1.txt +--- source_a/file_1.txt ++++ source_b/file_1.txt +@@ -2,3 +3,4 @@ + To be, or not to be, that is the question: +-Whether 'tis nobler in the mind to suffer ++The slings and arrows of outrageous fortune, ++Or to take arms against a sea of troubles + And by opposing end them. To die—to sleep, +Only in universe! +Only in source_a: file_2.txt +Only in source_b: file_3.txt some unrelated stuff here. +Only in source_b: file_3.txt From 29fa046e33bb9ccb5992240fe27380b709e750d0 Mon Sep 17 00:00:00 2001 From: sofiia-tesliuk Date: Thu, 13 Aug 2020 02:45:31 +0300 Subject: [PATCH 3/3] check for "Only in" message is non-regex now; --- diff/diff_test.go | 40 +++++++++++++++++-- diff/parse.go | 19 ++++----- .../sample_onlyin_complex_filenames.diff | 3 ++ 3 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 diff/testdata/sample_onlyin_complex_filenames.diff diff --git a/diff/diff_test.go b/diff/diff_test.go index 3a5540c..07c7303 100644 --- a/diff/diff_test.go +++ b/diff/diff_test.go @@ -571,14 +571,45 @@ func TestParseMultiFileDiffHeaders(t *testing.T) { "Only in universe!", }, }, + { + OrigName: "source_b/file_3.txt some unrelated stuff here.", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, { OrigName: "source_b/file_3.txt", OrigTime: nil, NewName: "", NewTime: nil, - Extended: []string{ - "Only in source_b: file_3.txt some unrelated stuff here.", - }, + Extended: nil, + }, + }, + }, + { + filename: "sample_onlyin_complex_filenames.diff", + wantDiffs: []*FileDiff{ + { + OrigName: "internal/trace/foo bar/bam", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "internal/trace/foo bar/bam: bar", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, + }, + { + OrigName: "internal/trace/hello/world: bazz", + OrigTime: nil, + NewName: "", + NewTime: nil, + Extended: nil, }, }, }, @@ -662,7 +693,8 @@ func TestParseMultiFileDiffAndPrintMultiFileDiff(t *testing.T) { {filename: "empty_multi.diff", wantFileDiffs: 2}, {filename: "sample_contains_added_deleted_files.diff", wantFileDiffs: 3}, {filename: "sample_contains_only_added_deleted_files.diff", wantFileDiffs: 3}, - {filename: "sample_onlyin_line_isnt_a_file_header.diff", wantFileDiffs: 3}, + {filename: "sample_onlyin_line_isnt_a_file_header.diff", wantFileDiffs: 4}, + {filename: "sample_onlyin_complex_filenames.diff", wantFileDiffs: 3}, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) diff --git a/diff/parse.go b/diff/parse.go index 722bb6b..8a36784 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -7,7 +7,6 @@ import ( "fmt" "io" "path/filepath" - "regexp" "strconv" "strings" "time" @@ -231,7 +230,8 @@ func (r *FileDiffReader) HunksReader() *HunksReader { // ReadFileHeaders reads the unified file diff header (the lines that // start with "---" and "+++" with the orig/new file names and -// timestamps). Or starts with "Only in " message with dir path filename. +// timestamps). Or which starts with "Only in " with dir path and filename. +// "Only in" message is supported in POSIX locale: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html#tag_20_34_10 func (r *FileDiffReader) ReadFileHeaders() (origName, newName string, origTimestamp, newTimestamp *time.Time, err error) { if r.fileHeaderLine != nil { if isOnlyMessage, source, filename := parseOnlyInMessage(r.fileHeaderLine); isOnlyMessage { @@ -639,14 +639,15 @@ func (r *HunksReader) ReadAllHunks() ([]*Hunk, error) { // parseOnlyInMessage checks if line is a "Only in {source}: {filename}" and returns source and filename func parseOnlyInMessage(line []byte) (bool, []byte, []byte) { - re := regexp.MustCompile("Only in ([\x21-\x7E]+): ([\x21-\x7E]+)") - if re.Match(line) { - slices := re.FindSubmatch(line) - if fmt.Sprintf("Only in %s: %s", slices[1], slices[2]) == string(line) { - return true, slices[1], slices[2] - } + if !bytes.HasPrefix(line, onlyInMessagePrefix) { + return false, nil, nil + } + line = line[len(onlyInMessagePrefix):] + idx := bytes.Index(line, []byte(": ")) + if idx < 0 { + return false, nil, nil } - return false, nil, nil + return true, line[:idx], line[idx+2:] } // A ParseError is a description of a unified diff syntax error. diff --git a/diff/testdata/sample_onlyin_complex_filenames.diff b/diff/testdata/sample_onlyin_complex_filenames.diff new file mode 100644 index 0000000..bf7b58f --- /dev/null +++ b/diff/testdata/sample_onlyin_complex_filenames.diff @@ -0,0 +1,3 @@ +Only in internal/trace/foo bar: bam +Only in internal/trace/foo bar: bam: bar +Only in internal/trace/hello: world: bazz