Skip to content

Commit 2595fe7

Browse files
committed
runtime: don't start new threads from locked threads
Applications that need to manipulate kernel thread state are currently on thin ice in Go: they can use LockOSThread to prevent other goroutines from running on the manipulated thread, but Go may clone this manipulated state into a new thread that's put into the runtime's thread pool along with other threads. Fix this by never starting a new thread from a locked thread or a thread that may have been started by C. Instead, the runtime starts a "template thread" with a known-good state. If it then needs to start a new thread but doesn't know that the current thread is in a good state, it forwards the thread creation to the template thread. Fixes #20676. Change-Id: I798137a56e04b7723d55997e9c5c085d1d910643 Reviewed-on: https://go-review.googlesource.com/46033 Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 9a9780a commit 2595fe7

File tree

2 files changed

+115
-3
lines changed

2 files changed

+115
-3
lines changed

src/runtime/proc.go

Lines changed: 114 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ func main() {
173173
if _cgo_notify_runtime_init_done == nil {
174174
throw("_cgo_notify_runtime_init_done missing")
175175
}
176+
// Start the template thread in case we enter Go from
177+
// a C-created thread and need to create a new thread.
178+
startTemplateThread()
176179
cgocall(_cgo_notify_runtime_init_done, nil)
177180
}
178181

@@ -1630,6 +1633,27 @@ func unlockextra(mp *m) {
16301633
// around exec'ing while creating/destroying threads. See issue #19546.
16311634
var execLock rwmutex
16321635

1636+
// newmHandoff contains a list of m structures that need new OS threads.
1637+
// This is used by newm in situations where newm itself can't safely
1638+
// start an OS thread.
1639+
var newmHandoff struct {
1640+
lock mutex
1641+
1642+
// newm points to a list of M structures that need new OS
1643+
// threads. The list is linked through m.schedlink.
1644+
newm muintptr
1645+
1646+
// waiting indicates that wake needs to be notified when an m
1647+
// is put on the list.
1648+
waiting bool
1649+
wake note
1650+
1651+
// haveTemplateThread indicates that the templateThread has
1652+
// been started. This is not protected by lock. Use cas to set
1653+
// to 1.
1654+
haveTemplateThread uint32
1655+
}
1656+
16331657
// Create a new m. It will start off with a call to fn, or else the scheduler.
16341658
// fn needs to be static and not a heap allocated closure.
16351659
// May run with m.p==nil, so write barriers are not allowed.
@@ -1638,6 +1662,33 @@ func newm(fn func(), _p_ *p) {
16381662
mp := allocm(_p_, fn)
16391663
mp.nextp.set(_p_)
16401664
mp.sigmask = initSigmask
1665+
if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) {
1666+
// We're on a locked M or a thread that may have been
1667+
// started by C. The kernel state of this thread may
1668+
// be strange (the user may have locked it for that
1669+
// purpose). We don't want to clone that into another
1670+
// thread. Instead, ask a known-good thread to create
1671+
// the thread for us.
1672+
//
1673+
// TODO: This may be unnecessary on Windows, which
1674+
// doesn't model thread creation off fork.
1675+
lock(&newmHandoff.lock)
1676+
if newmHandoff.haveTemplateThread == 0 {
1677+
throw("on a locked thread with no template thread")
1678+
}
1679+
mp.schedlink = newmHandoff.newm
1680+
newmHandoff.newm.set(mp)
1681+
if newmHandoff.waiting {
1682+
newmHandoff.waiting = false
1683+
notewakeup(&newmHandoff.wake)
1684+
}
1685+
unlock(&newmHandoff.lock)
1686+
return
1687+
}
1688+
newm1(mp)
1689+
}
1690+
1691+
func newm1(mp *m) {
16411692
if iscgo {
16421693
var ts cgothreadstart
16431694
if _cgo_thread_start == nil {
@@ -1659,6 +1710,56 @@ func newm(fn func(), _p_ *p) {
16591710
execLock.runlock()
16601711
}
16611712

1713+
// startTemplateThread starts the template thread if it is not already
1714+
// running.
1715+
//
1716+
// The calling thread must itself be in a known-good state.
1717+
func startTemplateThread() {
1718+
if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
1719+
return
1720+
}
1721+
newm(templateThread, nil)
1722+
}
1723+
1724+
// tmeplateThread is a thread in a known-good state that exists solely
1725+
// to start new threads in known-good states when the calling thread
1726+
// may not be a a good state.
1727+
//
1728+
// Many programs never need this, so templateThread is started lazily
1729+
// when we first enter a state that might lead to running on a thread
1730+
// in an unknown state.
1731+
//
1732+
// templateThread runs on an M without a P, so it must not have write
1733+
// barriers.
1734+
//
1735+
//go:nowritebarrierrec
1736+
func templateThread() {
1737+
lock(&sched.lock)
1738+
sched.nmsys++
1739+
checkdead()
1740+
unlock(&sched.lock)
1741+
1742+
for {
1743+
lock(&newmHandoff.lock)
1744+
for newmHandoff.newm != 0 {
1745+
newm := newmHandoff.newm.ptr()
1746+
newmHandoff.newm = 0
1747+
unlock(&newmHandoff.lock)
1748+
for newm != nil {
1749+
next := newm.schedlink.ptr()
1750+
newm.schedlink = 0
1751+
newm1(newm)
1752+
newm = next
1753+
}
1754+
lock(&newmHandoff.lock)
1755+
}
1756+
newmHandoff.waiting = true
1757+
noteclear(&newmHandoff.wake)
1758+
unlock(&newmHandoff.lock)
1759+
notesleep(&newmHandoff.wake)
1760+
}
1761+
}
1762+
16621763
// Stops execution of the current m until new work is available.
16631764
// Returns with acquired P.
16641765
func stopm() {
@@ -3176,6 +3277,12 @@ func dolockOSThread() {
31763277
// until the calling goroutine exits or has made as many calls to
31773278
// UnlockOSThread as to LockOSThread.
31783279
func LockOSThread() {
3280+
if atomic.Load(&newmHandoff.haveTemplateThread) == 0 {
3281+
// If we need to start a new thread from the locked
3282+
// thread, we need the template thread. Start it now
3283+
// while we're in a known-good state.
3284+
startTemplateThread()
3285+
}
31793286
_g_ := getg()
31803287
_g_.m.lockedExt++
31813288
if _g_.m.lockedExt == 0 {
@@ -3790,13 +3897,12 @@ func checkdead() {
37903897
return
37913898
}
37923899

3793-
// -1 for sysmon
3794-
run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
3900+
run := sched.mcount - sched.nmidle - sched.nmidlelocked - sched.nmsys
37953901
if run > 0 {
37963902
return
37973903
}
37983904
if run < 0 {
3799-
print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
3905+
print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, " nmsys=", sched.nmsys, "\n")
38003906
throw("checkdead: inconsistent counts")
38013907
}
38023908

@@ -3859,6 +3965,11 @@ var forcegcperiod int64 = 2 * 60 * 1e9
38593965
//
38603966
//go:nowritebarrierrec
38613967
func sysmon() {
3968+
lock(&sched.lock)
3969+
sched.nmsys++
3970+
checkdead()
3971+
unlock(&sched.lock)
3972+
38623973
// If a heap span goes unused for 5 minutes after a garbage collection,
38633974
// we hand it back to the operating system.
38643975
scavengelimit := int64(5 * 60 * 1e9)

src/runtime/runtime2.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ type schedt struct {
533533
nmidlelocked int32 // number of locked m's waiting for work
534534
mcount int32 // number of m's that have been created
535535
maxmcount int32 // maximum number of m's allowed (or die)
536+
nmsys int32 // number of system m's not counted for deadlock
536537

537538
ngsys uint32 // number of system goroutines; updated atomically
538539

0 commit comments

Comments
 (0)