Skip to content

Commit 7b1f8b6

Browse files
rhyshmknyszek
authored andcommitted
runtime: prefer curg for execution trace profile
The CPU profiler adds goroutine labels to its samples based on getg().m.curg. That allows the profile to correctly attribute work that the runtime does on behalf of that goroutine on the M's g0 stack via systemstack calls, such as using runtime.Callers to record the call stack. Those labels also cover work on the g0 stack via mcall. When the active goroutine calls runtime.Gosched, it will receive attribution of its share of the scheduler work necessary to find the next runnable goroutine. The execution tracer's attribution of CPU samples to specific goroutines should match. When curg is set, attribute the CPU samples to that goroutine's ID. Fixes #52693 Change-Id: Ic9af92e153abd8477559e48bc8ebaf3739527b94 Reviewed-on: https://go-review.googlesource.com/c/go/+/404055 Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: Bryan Mills <[email protected]> Run-TryBot: Rhys Hiltner <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent fdb640b commit 7b1f8b6

File tree

2 files changed

+53
-16
lines changed

2 files changed

+53
-16
lines changed

src/runtime/proc.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4596,11 +4596,15 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
45964596
}
45974597
cpuprof.add(tagPtr, stk[:n])
45984598

4599+
gprof := gp
45994600
var pp *p
46004601
if gp != nil && gp.m != nil {
4602+
if gp.m.curg != nil {
4603+
gprof = gp.m.curg
4604+
}
46014605
pp = gp.m.p.ptr()
46024606
}
4603-
traceCPUSample(gp, pp, stk[:n])
4607+
traceCPUSample(gprof, pp, stk[:n])
46044608
}
46054609
getg().m.mallocing--
46064610
}

src/runtime/trace/trace_test.go

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,9 @@ func TestTraceCPUProfile(t *testing.T) {
612612
pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) {
613613
cpuHogger(cpuHog1, &salt1, dur)
614614
})
615-
// Be sure the execution trace's view, when filtered to this goroutine,
616-
// gets many more samples than the CPU profiler when filtered by label.
615+
// Be sure the execution trace's view, when filtered to this goroutine
616+
// via the explicit goroutine ID in each event, gets many more samples
617+
// than the CPU profiler when filtered to this goroutine via labels.
617618
cpuHogger(cpuHog1, &salt1, dur)
618619
}()
619620

@@ -625,8 +626,12 @@ func TestTraceCPUProfile(t *testing.T) {
625626
if err != nil {
626627
t.Fatalf("failed to parse CPU profile: %v", err)
627628
}
629+
// Examine the CPU profiler's view. Filter it to only include samples from
630+
// the single test goroutine. Use labels to execute that filter: they should
631+
// apply to all work done while that goroutine is getg().m.curg, and they
632+
// should apply to no other goroutines.
628633
pprofSamples := 0
629-
pprofStacks := make(map[string]int) // CPU profiler's view, filtered to include the label
634+
pprofStacks := make(map[string]int)
630635
for _, s := range prof.Sample {
631636
if s.Label["tracing"] != nil {
632637
samples := int(s.Value[0])
@@ -645,8 +650,13 @@ func TestTraceCPUProfile(t *testing.T) {
645650
t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof)
646651
}
647652

653+
// Examine the execution tracer's view of the CPU profile samples. Filter it
654+
// to only include samples from the single test goroutine. Use the goroutine
655+
// ID that was recorded in the events: that should reflect getg().m.curg,
656+
// same as the profiler's labels (even when the M is using its g0 stack).
657+
totalTraceSamples := 0
648658
traceSamples := 0
649-
traceStacks := make(map[string]int) // Execution tracer's view, filtered to this goroutine
659+
traceStacks := make(map[string]int)
650660
events, _ := parseTrace(t, buf)
651661
var hogRegion *trace.Event
652662
for _, ev := range events {
@@ -661,32 +671,51 @@ func TestTraceCPUProfile(t *testing.T) {
661671
t.Fatalf("execution trace did not close cpuHogger region")
662672
}
663673
for _, ev := range events {
664-
if ev.Type == trace.EvCPUSample && ev.G == hogRegion.G {
665-
traceSamples++
666-
var fns []string
667-
for _, frame := range ev.Stk {
668-
if frame.Fn != "runtime.goexit" {
669-
fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line))
674+
if ev.Type == trace.EvCPUSample {
675+
totalTraceSamples++
676+
if ev.G == hogRegion.G {
677+
traceSamples++
678+
var fns []string
679+
for _, frame := range ev.Stk {
680+
if frame.Fn != "runtime.goexit" {
681+
fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line))
682+
}
670683
}
684+
stack := strings.Join(fns, " ")
685+
traceStacks[stack]++
671686
}
672-
stack := strings.Join(fns, " ")
673-
traceStacks[stack]++
674687
}
675688
}
689+
690+
// The execution trace may drop CPU profile samples if the profiling buffer
691+
// overflows. Based on the size of profBufWordCount, that takes a bit over
692+
// 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've
693+
// hit that case, then we definitely have at least one full buffer's worth
694+
// of CPU samples, so we'll call that success.
695+
overflowed := totalTraceSamples >= 1900
676696
if traceSamples < pprofSamples {
677-
t.Errorf("exectution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
697+
t.Logf("exectution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
698+
if !overflowed {
699+
t.Fail()
700+
}
678701
}
679702

680703
for stack, traceSamples := range traceStacks {
681704
pprofSamples := pprofStacks[stack]
682705
delete(pprofStacks, stack)
683706
if traceSamples < pprofSamples {
684-
t.Errorf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
707+
t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
685708
stack, pprofSamples, traceSamples)
709+
if !overflowed {
710+
t.Fail()
711+
}
686712
}
687713
}
688714
for stack, pprofSamples := range pprofStacks {
689-
t.Errorf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
715+
t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
716+
if !overflowed {
717+
t.Fail()
718+
}
690719
}
691720

692721
if t.Failed() {
@@ -726,6 +755,10 @@ func cpuHog1(x int) int {
726755
func cpuHog0(x, n int) int {
727756
foo := x
728757
for i := 0; i < n; i++ {
758+
if i%1000 == 0 {
759+
// Spend time in mcall, stored as gp.m.curg, with g0 running
760+
runtime.Gosched()
761+
}
729762
if foo > 0 {
730763
foo *= foo
731764
} else {

0 commit comments

Comments
 (0)