Skip to content

Commit 5bc20c4

Browse files
authored
Merge pull request #56 from sourcegraph/mrnugget/fix-non-extended-file-header-parsing-2
Add a lineReader to provide two-line lookahead
2 parents 842d2c1 + deb8e40 commit 5bc20c4

6 files changed

+271
-25
lines changed

diff/diff_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ func TestParseHunksAndPrintHunks(t *testing.T) {
7474
{filename: "oneline_hunk.diff"},
7575
{filename: "empty.diff"},
7676
{filename: "sample_hunk_lines_start_with_minuses.diff"},
77+
{filename: "sample_hunk_lines_start_with_minuses_pluses.diff"},
7778
}
7879
for _, test := range tests {
7980
diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename))
@@ -736,6 +737,8 @@ func TestParseMultiFileDiffAndPrintMultiFileDiff(t *testing.T) {
736737
{filename: "sample_contains_only_added_deleted_files.diff", wantFileDiffs: 3},
737738
{filename: "sample_onlyin_line_isnt_a_file_header.diff", wantFileDiffs: 4},
738739
{filename: "sample_onlyin_complex_filenames.diff", wantFileDiffs: 3},
740+
{filename: "sample_multi_file_minuses_pluses.diff", wantFileDiffs: 2},
741+
{filename: "sample_multi_file_without_extended.diff", wantFileDiffs: 2},
739742
}
740743
for _, test := range tests {
741744
diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename))

diff/parse.go

+15-25
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ func ParseMultiFileDiff(diff []byte) ([]*FileDiff, error) {
2323
// NewMultiFileDiffReader returns a new MultiFileDiffReader that reads
2424
// a multi-file unified diff from r.
2525
func NewMultiFileDiffReader(r io.Reader) *MultiFileDiffReader {
26-
return &MultiFileDiffReader{reader: bufio.NewReader(r)}
26+
return &MultiFileDiffReader{reader: newLineReader(r)}
2727
}
2828

2929
// MultiFileDiffReader reads a multi-file unified diff.
3030
type MultiFileDiffReader struct {
3131
line int
3232
offset int64
33-
reader *bufio.Reader
33+
reader *lineReader
3434

3535
// TODO(sqs): line and offset tracking in multi-file diffs is broken; add tests and fix
3636

@@ -85,7 +85,7 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) {
8585
// caused by the lack of any hunks, or a malformatted hunk, so we
8686
// need to perform the check here.
8787
hr := fr.HunksReader()
88-
line, err := readLine(r.reader)
88+
line, err := r.reader.readLine()
8989
if err != nil && err != io.EOF {
9090
return fd, err
9191
}
@@ -141,14 +141,14 @@ func ParseFileDiff(diff []byte) (*FileDiff, error) {
141141
// NewFileDiffReader returns a new FileDiffReader that reads a file
142142
// unified diff.
143143
func NewFileDiffReader(r io.Reader) *FileDiffReader {
144-
return &FileDiffReader{reader: bufio.NewReader(r)}
144+
return &FileDiffReader{reader: &lineReader{reader: bufio.NewReader(r)}}
145145
}
146146

147147
// FileDiffReader reads a unified file diff.
148148
type FileDiffReader struct {
149149
line int
150150
offset int64
151-
reader *bufio.Reader
151+
reader *lineReader
152152

153153
// fileHeaderLine is the first file header line, set by:
154154
//
@@ -266,7 +266,7 @@ func (r *FileDiffReader) readOneFileHeader(prefix []byte) (filename string, time
266266

267267
if r.fileHeaderLine == nil {
268268
var err error
269-
line, err = readLine(r.reader)
269+
line, err = r.reader.readLine()
270270
if err == io.EOF {
271271
return "", nil, &ParseError{r.line, r.offset, ErrNoFileHeader}
272272
} else if err != nil {
@@ -318,7 +318,7 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) {
318318
var line []byte
319319
if r.fileHeaderLine == nil {
320320
var err error
321-
line, err = readLine(r.reader)
321+
line, err = r.reader.readLine()
322322
if err == io.EOF {
323323
return xheaders, &ParseError{r.line, r.offset, ErrExtendedHeadersEOF}
324324
} else if err != nil {
@@ -447,15 +447,15 @@ func ParseHunks(diff []byte) ([]*Hunk, error) {
447447
// NewHunksReader returns a new HunksReader that reads unified diff hunks
448448
// from r.
449449
func NewHunksReader(r io.Reader) *HunksReader {
450-
return &HunksReader{reader: bufio.NewReader(r)}
450+
return &HunksReader{reader: &lineReader{reader: bufio.NewReader(r)}}
451451
}
452452

453453
// A HunksReader reads hunks from a unified diff.
454454
type HunksReader struct {
455455
line int
456456
offset int64
457457
hunk *Hunk
458-
reader *bufio.Reader
458+
reader *lineReader
459459

460460
nextHunkHeaderLine []byte
461461
}
@@ -474,7 +474,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) {
474474
line = r.nextHunkHeaderLine
475475
r.nextHunkHeaderLine = nil
476476
} else {
477-
line, err = readLine(r.reader)
477+
line, err = r.reader.readLine()
478478
if err != nil {
479479
if err == io.EOF && r.hunk != nil {
480480
return r.hunk, nil
@@ -518,12 +518,15 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) {
518518
// If the line starts with `---` and the next one with `+++` we're
519519
// looking at a non-extended file header and need to abort.
520520
if bytes.HasPrefix(line, []byte("---")) {
521-
ok, err := peekPrefix(r.reader, "+++")
521+
ok, err := r.reader.nextLineStartsWith("+++")
522522
if err != nil {
523523
return r.hunk, err
524524
}
525525
if ok {
526-
return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}}
526+
ok2, _ := r.reader.nextNextLineStartsWith(string(hunkPrefix))
527+
if ok2 {
528+
return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}}
529+
}
527530
}
528531
}
529532

@@ -593,19 +596,6 @@ func linePrefix(c byte) bool {
593596
return false
594597
}
595598

596-
// peekPrefix peeks into the given reader to check whether the next
597-
// bytes match the given prefix.
598-
func peekPrefix(reader *bufio.Reader, prefix string) (bool, error) {
599-
next, err := reader.Peek(len(prefix))
600-
if err != nil {
601-
if err == io.EOF {
602-
return false, nil
603-
}
604-
return false, err
605-
}
606-
return bytes.HasPrefix(next, []byte(prefix)), nil
607-
}
608-
609599
// normalizeHeader takes a header of the form:
610600
// "@@ -linestart[,chunksize] +linestart[,chunksize] @@ section"
611601
// and returns two strings, with the first in the form:

diff/reader_util.go

+83
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,92 @@ package diff
22

33
import (
44
"bufio"
5+
"bytes"
6+
"errors"
57
"io"
68
)
79

10+
var ErrLineReaderUninitialized = errors.New("line reader not initialized")
11+
12+
func newLineReader(r io.Reader) *lineReader {
13+
return &lineReader{reader: bufio.NewReader(r)}
14+
}
15+
16+
// lineReader is a wrapper around a bufio.Reader that caches the next line to
17+
// provide lookahead functionality for the next two lines.
18+
type lineReader struct {
19+
reader *bufio.Reader
20+
21+
cachedNextLine []byte
22+
cachedNextLineErr error
23+
}
24+
25+
// readLine returns the next unconsumed line and advances the internal cache of
26+
// the lineReader.
27+
func (l *lineReader) readLine() ([]byte, error) {
28+
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
29+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
30+
}
31+
32+
if l.cachedNextLineErr != nil {
33+
return nil, l.cachedNextLineErr
34+
}
35+
36+
next := l.cachedNextLine
37+
38+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
39+
40+
return next, nil
41+
}
42+
43+
// nextLineStartsWith looks at the line that would be returned by the next call
44+
// to readLine to check whether it has the given prefix.
45+
//
46+
// io.EOF and bufio.ErrBufferFull errors are ignored so that the function can
47+
// be used when at the end of the file.
48+
func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) {
49+
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
50+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
51+
}
52+
53+
return l.lineHasPrefix(l.cachedNextLine, prefix, l.cachedNextLineErr)
54+
}
55+
56+
// nextNextLineStartsWith checks the prefix of the line *after* the line that
57+
// would be returned by the next readLine.
58+
//
59+
// io.EOF and bufio.ErrBufferFull errors are ignored so that the function can
60+
// be used when at the end of the file.
61+
//
62+
// The lineReader MUST be initialized by calling readLine at least once before
63+
// calling nextLineStartsWith. Otherwise ErrLineReaderUninitialized will be
64+
// returned.
65+
func (l *lineReader) nextNextLineStartsWith(prefix string) (bool, error) {
66+
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
67+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
68+
}
69+
70+
next, err := l.reader.Peek(len(prefix))
71+
return l.lineHasPrefix(next, prefix, err)
72+
}
73+
74+
// lineHasPrefix checks whether the given line has the given prefix with
75+
// bytes.HasPrefix.
76+
//
77+
// The readErr should be the error that was returned when the line was read.
78+
// lineHasPrefix checks the error to adjust its return value to, e.g., return
79+
// false and ignore the error when readErr is io.EOF.
80+
func (l *lineReader) lineHasPrefix(line []byte, prefix string, readErr error) (bool, error) {
81+
if readErr != nil {
82+
if readErr == io.EOF || readErr == bufio.ErrBufferFull {
83+
return false, nil
84+
}
85+
return false, readErr
86+
}
87+
88+
return bytes.HasPrefix(line, []byte(prefix)), nil
89+
}
90+
891
// readLine is a helper that mimics the functionality of calling bufio.Scanner.Scan() and
992
// bufio.Scanner.Bytes(), but without the token size limitation. It will read and return
1093
// the next line in the Reader with the trailing newline stripped. It will return an

diff/reader_util_test.go

+141
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,144 @@ index 0000000..3be2928`,
6666
})
6767
}
6868
}
69+
70+
func TestLineReader_ReadLine(t *testing.T) {
71+
input := `diff --git a/test.go b/test.go
72+
new file mode 100644
73+
index 0000000..3be2928
74+
75+
76+
`
77+
78+
in := newLineReader(strings.NewReader(input))
79+
out := []string{}
80+
for i := 0; i < 4; i++ {
81+
l, err := in.readLine()
82+
if err != nil {
83+
t.Fatal(err)
84+
}
85+
out = append(out, string(l))
86+
}
87+
88+
wantOut := strings.Split(input, "\n")[0:4]
89+
if !reflect.DeepEqual(wantOut, out) {
90+
t.Errorf("read lines not equal: want %v, got %v", wantOut, out)
91+
}
92+
93+
_, err := in.readLine()
94+
if err != nil {
95+
t.Fatal(err)
96+
}
97+
if in.cachedNextLineErr != io.EOF {
98+
t.Fatalf("lineReader has wrong cachedNextLineErr: %s", in.cachedNextLineErr)
99+
}
100+
_, err = in.readLine()
101+
if err != io.EOF {
102+
t.Fatalf("readLine did not return io.EOF: %s", err)
103+
}
104+
}
105+
106+
func TestLineReader_NextLine(t *testing.T) {
107+
input := `aaa rest of line
108+
bbbrest of line
109+
ccc rest of line`
110+
111+
in := newLineReader(strings.NewReader(input))
112+
113+
type assertion struct {
114+
prefix string
115+
want bool
116+
}
117+
118+
testsPerReadLine := []struct {
119+
nextLine []assertion
120+
nextNextLine []assertion
121+
wantReadLineErr error
122+
}{
123+
{
124+
nextLine: []assertion{
125+
{prefix: "a", want: true},
126+
{prefix: "aa", want: true},
127+
{prefix: "aaa", want: true},
128+
{prefix: "bbb", want: false},
129+
{prefix: "ccc", want: false},
130+
},
131+
nextNextLine: []assertion{
132+
{prefix: "aaa", want: false},
133+
{prefix: "bbb", want: true},
134+
{prefix: "ccc", want: false},
135+
},
136+
},
137+
{
138+
nextLine: []assertion{
139+
{prefix: "aaa", want: false},
140+
{prefix: "bbb", want: true},
141+
{prefix: "ccc", want: false},
142+
},
143+
nextNextLine: []assertion{
144+
{prefix: "aaa", want: false},
145+
{prefix: "bbb", want: false},
146+
{prefix: "ccc", want: true},
147+
},
148+
},
149+
{
150+
nextLine: []assertion{
151+
{prefix: "aaa", want: false},
152+
{prefix: "bbb", want: false},
153+
{prefix: "ccc", want: true},
154+
{prefix: "ddd", want: false},
155+
},
156+
nextNextLine: []assertion{
157+
{prefix: "aaa", want: false},
158+
{prefix: "bbb", want: false},
159+
{prefix: "ccc", want: false},
160+
{prefix: "ddd", want: false},
161+
},
162+
},
163+
{
164+
nextLine: []assertion{
165+
{prefix: "aaa", want: false},
166+
{prefix: "bbb", want: false},
167+
{prefix: "ccc", want: false},
168+
{prefix: "ddd", want: false},
169+
},
170+
nextNextLine: []assertion{
171+
{prefix: "aaa", want: false},
172+
{prefix: "bbb", want: false},
173+
{prefix: "ccc", want: false},
174+
{prefix: "ddd", want: false},
175+
},
176+
wantReadLineErr: io.EOF,
177+
},
178+
}
179+
180+
for _, tc := range testsPerReadLine {
181+
for _, assert := range tc.nextLine {
182+
got, err := in.nextLineStartsWith(assert.prefix)
183+
if err != nil {
184+
t.Fatalf("nextLineStartsWith returned unexpected error: %s", err)
185+
}
186+
187+
if got != assert.want {
188+
t.Fatalf("unexpected result for prefix %q. got=%t, want=%t", assert.prefix, got, assert.want)
189+
}
190+
}
191+
192+
for _, assert := range tc.nextNextLine {
193+
got, err := in.nextNextLineStartsWith(assert.prefix)
194+
if err != nil {
195+
t.Fatalf("nextLineStartsWith returned unexpected error: %s", err)
196+
}
197+
198+
if got != assert.want {
199+
t.Fatalf("unexpected result for prefix %q. got=%t, want=%t", assert.prefix, got, assert.want)
200+
}
201+
}
202+
203+
_, err := in.readLine()
204+
if err != tc.wantReadLineErr {
205+
t.Fatalf("readLine returned unexpected error. got=%s, want=%s", err, tc.wantReadLineErr)
206+
}
207+
208+
}
209+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
@@ -1,5 +1,5 @@
2+
select 1;
3+
--- this is my query
4+
+++ this is my query
5+
select 2;
6+
select 3;
7+
--- this is the last line
8+
+++ this is the last line

0 commit comments

Comments
 (0)