Skip to content

Commit 45c6f59

Browse files
committed
runtime: use two-level list for semaphore address search in semaRoot
If there are many goroutines contending for two different locks and both locks hash to the same semaRoot, the scans to find the goroutines for a particular lock can end up being O(n), making n lock acquisitions quadratic. As long as only one actively-used lock hashes to each semaRoot there's no problem, since the list operations in that case are O(1). But when the second actively-used lock hits the same semaRoot, then scans for entries with for a given lock have to scan over the entries for the other lock. Fix this problem by changing the semaRoot to hold only one sudog per unique address. In the running example, this drops the length of that list from O(n) to 2. Then attach other goroutines waiting on the same address to a separate list headed by the sudog in the semaRoot list. Those "same address list" operations are still O(1), so now the example from above works much better. There is still an assumption here that in real programs you don't have many many goroutines queueing up on many many distinct addresses. If we end up with that problem, we can replace the top-level list with a treap. Fixes #17953. Change-Id: I78c5b1a5053845275ab31686038aa4f6db5720b2 Reviewed-on: https://go-review.googlesource.com/36792 Run-TryBot: Russ Cox <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 93a18ac commit 45c6f59

File tree

2 files changed

+90
-25
lines changed

2 files changed

+90
-25
lines changed

src/runtime/runtime2.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ type gobuf struct {
270270
type sudog struct {
271271
// The following fields are protected by the hchan.lock of the
272272
// channel this sudog is blocking on. shrinkstack depends on
273-
// this.
273+
// this for sudogs involved in channel ops.
274274

275275
g *g
276276
selectdone *uint32 // CAS to 1 to win select race (may point to stack)
@@ -279,12 +279,15 @@ type sudog struct {
279279
elem unsafe.Pointer // data element (may point to stack)
280280

281281
// The following fields are never accessed concurrently.
282-
// waitlink is only accessed by g.
282+
// For channels, waitlink is only accessed by g.
283+
// For semaphores, all fields (including the ones above)
284+
// are only accessed when holding a semaRoot lock.
283285

284286
acquiretime int64
285287
releasetime int64
286288
ticket uint32
287-
waitlink *sudog // g.waiting list
289+
waitlink *sudog // g.waiting list or semaRoot
290+
waittail *sudog // semaRoot
288291
c *hchan // channel
289292
}
290293

src/runtime/sema.go

Lines changed: 84 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@ import (
2727

2828
// Asynchronous semaphore for sync.Mutex.
2929

30+
// A semaRoot holds a linked list of sudog with distinct addresses (s.elem).
31+
// Each of those sudog may in turn point (through s.waitlink) to a list
32+
// of other sudogs waiting on the same address.
33+
// The operations on the inner lists of sudogs with the same address
34+
// are all O(1). Only the scanning of the top-level semaRoot list is O(n),
35+
// where n is the number of distinct addresses with goroutines blocked
36+
// on them that hash to the given semaRoot.
37+
// In systems with many goroutines, most queue up on a few addresses,
38+
// so the linear search across unique addresses is probably OK.
39+
// At least, we'll use this until it's not.
40+
// The next step is probably to make the top-level list a treap instead
41+
// of a linked list.
42+
// See golang.org/issue/17953 for a program that worked badly
43+
// before we introduced the second level of list.
3044
type semaRoot struct {
3145
lock mutex
3246
head *sudog
@@ -157,22 +171,10 @@ func semrelease(addr *uint32) {
157171
unlock(&root.lock)
158172
return
159173
}
160-
s := root.head
161-
for ; s != nil; s = s.next {
162-
if s.elem == unsafe.Pointer(addr) {
163-
atomic.Xadd(&root.nwait, -1)
164-
root.dequeue(s)
165-
break
166-
}
167-
}
174+
s, t0 := root.dequeue(addr)
168175
if s != nil {
176+
atomic.Xadd(&root.nwait, -1)
169177
if s.acquiretime != 0 {
170-
t0 := cputicks()
171-
for x := root.head; x != nil; x = x.next {
172-
if x.elem == unsafe.Pointer(addr) {
173-
x.acquiretime = t0
174-
}
175-
}
176178
mutexevent(t0-s.acquiretime, 3)
177179
}
178180
}
@@ -198,9 +200,26 @@ func cansemacquire(addr *uint32) bool {
198200
}
199201
}
200202

203+
// queue adds s to the blocked goroutines in semaRoot.
201204
func (root *semaRoot) queue(addr *uint32, s *sudog) {
202205
s.g = getg()
203206
s.elem = unsafe.Pointer(addr)
207+
208+
for t := root.head; t != nil; t = t.next {
209+
if t.elem == unsafe.Pointer(addr) {
210+
// Already have addr in list; add s to end of per-addr list.
211+
if t.waittail == nil {
212+
t.waitlink = s
213+
} else {
214+
t.waittail.waitlink = s
215+
}
216+
t.waittail = s
217+
s.waitlink = nil
218+
return
219+
}
220+
}
221+
222+
// Add s as new entry in list of unique addrs.
204223
s.next = nil
205224
s.prev = root.tail
206225
if root.tail != nil {
@@ -211,20 +230,63 @@ func (root *semaRoot) queue(addr *uint32, s *sudog) {
211230
root.tail = s
212231
}
213232

214-
func (root *semaRoot) dequeue(s *sudog) {
215-
if s.next != nil {
216-
s.next.prev = s.prev
217-
} else {
218-
root.tail = s.prev
233+
// dequeue searches for and finds the first goroutine
234+
// in semaRoot blocked on addr.
235+
// If the sudog was being profiled, dequeue returns the time
236+
// at which it was woken up as now. Otherwise now is 0.
237+
func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
238+
s := root.head
239+
for ; s != nil; s = s.next {
240+
if s.elem == unsafe.Pointer(addr) {
241+
goto Found
242+
}
219243
}
220-
if s.prev != nil {
221-
s.prev.next = s.next
244+
return nil, 0
245+
246+
Found:
247+
now = int64(0)
248+
if s.acquiretime != 0 {
249+
now = cputicks()
250+
}
251+
if t := s.waitlink; t != nil {
252+
// Substitute t, also waiting on addr, for s in root list of unique addrs.
253+
t.prev = s.prev
254+
t.next = s.next
255+
if t.prev != nil {
256+
t.prev.next = t
257+
} else {
258+
root.head = t
259+
}
260+
if t.next != nil {
261+
t.next.prev = t
262+
} else {
263+
root.tail = t
264+
}
265+
if t.waitlink != nil {
266+
t.waittail = s.waittail
267+
} else {
268+
t.waittail = nil
269+
}
270+
t.acquiretime = now
271+
s.waitlink = nil
272+
s.waittail = nil
222273
} else {
223-
root.head = s.next
274+
// Remove s from list.
275+
if s.next != nil {
276+
s.next.prev = s.prev
277+
} else {
278+
root.tail = s.prev
279+
}
280+
if s.prev != nil {
281+
s.prev.next = s.next
282+
} else {
283+
root.head = s.next
284+
}
224285
}
225286
s.elem = nil
226287
s.next = nil
227288
s.prev = nil
289+
return s, now
228290
}
229291

230292
// notifyList is a ticket-based notification list used to implement sync.Cond.

0 commit comments

Comments
 (0)