Skip to content

Commit 4513f1a

Browse files
committed
internal/trace/v2: correctly handle a broken spilled batch
Currently if the first batch of the next generation in the trace is broken, then the previous generation will fail to parse. The parser currently relies on one complete batch of the next generation to continue. However, this means that recovering a complete generation from a trace with a broken tail doesn't always work. Luckily, this is fixable. When the parser encounters an error reading a batch in a generation, it simply writes down that error and processes it later, once the generation has been handled. If it turns out the error was for the same generation and something bigger is broken, then the parser will catch that sooner when validating the generation's events and the error will never show up. Otherwise, the generation will parse through successfully and we'll emit the error once that's done. Fixes #55160. Change-Id: I9c9c19d5bb163c5225e18d11594ca2a8793c6950 Reviewed-on: https://go-review.googlesource.com/c/go/+/584275 Reviewed-by: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent ff743ce commit 4513f1a

File tree

4 files changed

+72
-2
lines changed

4 files changed

+72
-2
lines changed

src/internal/trace/v2/generation.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ type spilledBatch struct {
4141
// out of r. spill is the first batch of the new generation (already buffered and
4242
// parsed from reading the last generation). Returns the generation and the first
4343
// batch read of the next generation, if any.
44+
//
45+
// If gen is non-nil, it is valid and must be processed before handling the returned
46+
// error.
4447
func readGeneration(r *bufio.Reader, spill *spilledBatch) (*generation, *spilledBatch, error) {
4548
g := &generation{
4649
evTable: &evTable{
@@ -58,12 +61,20 @@ func readGeneration(r *bufio.Reader, spill *spilledBatch) (*generation, *spilled
5861
}
5962
// Read batches one at a time until we either hit EOF or
6063
// the next generation.
64+
var spillErr error
6165
for {
6266
b, gen, err := readBatch(r)
6367
if err == io.EOF {
6468
break
6569
}
6670
if err != nil {
71+
if g.gen != 0 {
72+
// This is an error reading the first batch of the next generation.
73+
// This is fine. Let's forge ahead assuming that what we've got so
74+
// far is fine.
75+
spillErr = err
76+
break
77+
}
6778
return nil, nil, err
6879
}
6980
if gen == 0 {
@@ -121,7 +132,7 @@ func readGeneration(r *bufio.Reader, spill *spilledBatch) (*generation, *spilled
121132
slices.SortFunc(g.cpuSamples, func(a, b cpuSample) int {
122133
return cmp.Compare(a.time, b.time)
123134
})
124-
return g, spill, nil
135+
return g, spill, spillErr
125136
}
126137

127138
// processBatch adds the batch to the generation.

src/internal/trace/v2/reader.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ type Reader struct {
2222
lastTs Time
2323
gen *generation
2424
spill *spilledBatch
25+
spillErr error // error from reading spill
2526
frontier []*batchCursor
2627
cpuSamples []cpuSample
2728
order ordering
@@ -118,6 +119,9 @@ func (r *Reader) ReadEvent() (e Event, err error) {
118119
r.emittedSync = true
119120
return syncEvent(r.gen.evTable, r.lastTs), nil
120121
}
122+
if r.spillErr != nil {
123+
return Event{}, r.spillErr
124+
}
121125
if r.gen != nil && r.spill == nil {
122126
// If we have a generation from the last read,
123127
// and there's nothing left in the frontier, and
@@ -127,10 +131,12 @@ func (r *Reader) ReadEvent() (e Event, err error) {
127131
return Event{}, io.EOF
128132
}
129133
// Read the next generation.
134+
var err error
130135
r.gen, r.spill, err = readGeneration(r.r, r.spill)
131-
if err != nil {
136+
if r.gen == nil {
132137
return Event{}, err
133138
}
139+
r.spillErr = err
134140

135141
// Reset CPU samples cursor.
136142
r.cpuSamples = r.gen.cpuSamples
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Regression test for #55160.
6+
//
7+
// The issue is that the parser reads ahead to the first batch of the
8+
// next generation to find generation boundaries, but if it finds an
9+
// error, it needs to delay handling that error until later. Previously
10+
// it would handle that error immediately and a totally valid generation
11+
// would be skipped for parsing and rejected because of an error in a
12+
// batch in the following generation.
13+
//
14+
// This test captures this behavior by making both the first generation
15+
// and second generation bad. It requires that the issue in the first
16+
// generation, which is caught when actually ordering events, be reported
17+
// instead of the second one.
18+
19+
package main
20+
21+
import (
22+
"internal/trace/v2/event/go122"
23+
testgen "internal/trace/v2/internal/testgen/go122"
24+
)
25+
26+
func main() {
27+
testgen.Main(gen)
28+
}
29+
30+
func gen(t *testgen.Trace) {
31+
// A running goroutine emits a task begin.
32+
t.RawEvent(go122.EvEventBatch, nil, 1 /*gen*/, 0 /*thread ID*/, 0 /*timestamp*/, 5 /*batch length*/)
33+
t.RawEvent(go122.EvFrequency, nil, 15625000)
34+
35+
// A running goroutine emits a task begin.
36+
t.RawEvent(go122.EvEventBatch, nil, 1 /*gen*/, 0 /*thread ID*/, 0 /*timestamp*/, 5 /*batch length*/)
37+
t.RawEvent(go122.EvGoCreate, nil, 0 /*timestamp delta*/, 1 /*go ID*/, 0, 0)
38+
39+
// Write an invalid batch event for the next generation.
40+
t.RawEvent(go122.EvEventBatch, nil, 2 /*gen*/, 0 /*thread ID*/, 0 /*timestamp*/, 50 /*batch length (invalid)*/)
41+
42+
// We should fail at the first issue, not the second one.
43+
t.ExpectFailure("expected a proc but didn't have one")
44+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
-- expect --
2+
FAILURE "expected a proc but didn't have one"
3+
-- trace --
4+
Trace Go1.22
5+
EventBatch gen=1 m=0 time=0 size=5
6+
Frequency freq=15625000
7+
EventBatch gen=1 m=0 time=0 size=5
8+
GoCreate dt=0 new_g=1 new_stack=0 stack=0
9+
EventBatch gen=2 m=0 time=0 size=50

0 commit comments

Comments
 (0)