Skip to content

Commit cf52e70

Browse files
committed
runtime: use a high res timer to signal io completion ports on windows
GetQueuedCompletionStatusEx has a ~16ms timeout resolution. Use a WaitCompletionPacket associated with the I/O Completion Port (IOCP) and a high resolution timer so the IOCP is signaled on timer expiry, therefore improving the GetQueuedCompletionStatusEx timeout resolution. BenchmarkSleep from the time package shows an important improvement: goos: windows goarch: amd64 pkg: time cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Sleep-12 1258.5µ ± 5% 250.7µ ± 1% -80.08% (p=0.000 n=20) Fixes #44343. Change-Id: I79fc09e34dddfc49e0e23c3d1d0603926c22a11d Reviewed-on: https://go-review.googlesource.com/c/go/+/488675 Reviewed-by: Alex Brainman <[email protected]> Run-TryBot: Quim Muntal <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Bryan Mills <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 5c92f43 commit cf52e70

File tree

5 files changed

+154
-30
lines changed

5 files changed

+154
-30
lines changed

Diff for: src/runtime/netpoll_windows.go

+77-14
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ const _INVALID_HANDLE_VALUE = ^uintptr(0)
2121
const (
2222
netpollSourceReady = iota + 1
2323
netpollSourceBreak
24+
netpollSourceTimer
2425
)
2526

2627
const (
@@ -148,31 +149,45 @@ func netpollBreak() {
148149
// delay == 0: does not block, just polls
149150
// delay > 0: block for up to that many nanoseconds
150151
func netpoll(delay int64) (gList, int32) {
152+
if iocphandle == _INVALID_HANDLE_VALUE {
153+
return gList{}, 0
154+
}
155+
151156
var entries [64]overlappedEntry
152-
var wait, n, i uint32
153-
var errno int32
157+
var wait uint32
154158
var toRun gList
155-
156159
mp := getg().m
157160

158-
if iocphandle == _INVALID_HANDLE_VALUE {
159-
return gList{}, 0
161+
if delay >= 1e15 {
162+
// An arbitrary cap on how long to wait for a timer.
163+
// 1e15 ns == ~11.5 days.
164+
delay = 1e15
165+
}
166+
167+
if delay > 0 && mp.waitIocpHandle != 0 {
168+
// GetQueuedCompletionStatusEx doesn't use a high resolution timer internally,
169+
// so we use a separate higher resolution timer associated with a wait completion
170+
// packet to wake up the poller. Note that the completion packet can be delivered
171+
// to another thread, and the Go scheduler expects netpoll to only block up to delay,
172+
// so we still need to use a timeout with GetQueuedCompletionStatusEx.
173+
// TODO: Improve the Go scheduler to support non-blocking timers.
174+
signaled := netpollQueueTimer(delay)
175+
if signaled {
176+
// There is a small window between the SetWaitableTimer and the NtAssociateWaitCompletionPacket
177+
// where the timer can expire. We can return immediately in this case.
178+
return gList{}, 0
179+
}
160180
}
161181
if delay < 0 {
162182
wait = _INFINITE
163183
} else if delay == 0 {
164184
wait = 0
165185
} else if delay < 1e6 {
166186
wait = 1
167-
} else if delay < 1e15 {
168-
wait = uint32(delay / 1e6)
169187
} else {
170-
// An arbitrary cap on how long to wait for a timer.
171-
// 1e9 ms == ~11.5 days.
172-
wait = 1e9
188+
wait = uint32(delay / 1e6)
173189
}
174-
175-
n = uint32(len(entries) / int(gomaxprocs))
190+
n := len(entries) / int(gomaxprocs)
176191
if n < 8 {
177192
n = 8
178193
}
@@ -181,7 +196,7 @@ func netpoll(delay int64) (gList, int32) {
181196
}
182197
if stdcall6(_GetQueuedCompletionStatusEx, iocphandle, uintptr(unsafe.Pointer(&entries[0])), uintptr(n), uintptr(unsafe.Pointer(&n)), uintptr(wait), 0) == 0 {
183198
mp.blocked = false
184-
errno = int32(getlasterror())
199+
errno := getlasterror()
185200
if errno == _WAIT_TIMEOUT {
186201
return gList{}, 0
187202
}
@@ -190,7 +205,7 @@ func netpoll(delay int64) (gList, int32) {
190205
}
191206
mp.blocked = false
192207
delta := int32(0)
193-
for i = 0; i < n; i++ {
208+
for i := 0; i < n; i++ {
194209
e := &entries[i]
195210
switch unpackNetpollSource(e.key) {
196211
case netpollSourceReady:
@@ -212,10 +227,58 @@ func netpoll(delay int64) (gList, int32) {
212227
// Forward the notification to the blocked poller.
213228
netpollBreak()
214229
}
230+
case netpollSourceTimer:
231+
// TODO: We could avoid calling NtCancelWaitCompletionPacket for expired wait completion packets.
215232
default:
216233
println("runtime: GetQueuedCompletionStatusEx returned net_op with invalid key=", e.key)
217234
throw("runtime: netpoll failed")
218235
}
219236
}
220237
return toRun, delta
221238
}
239+
240+
// netpollQueueTimer queues a timer to wake up the poller after the given delay.
241+
// It returns true if the timer expired during this call.
242+
func netpollQueueTimer(delay int64) (signaled bool) {
243+
const (
244+
STATUS_SUCCESS = 0x00000000
245+
STATUS_PENDING = 0x00000103
246+
STATUS_CANCELLED = 0xC0000120
247+
)
248+
mp := getg().m
249+
// A wait completion packet can only be associated with one timer at a time,
250+
// so we need to cancel the previous one if it exists. This wouldn't be necessary
251+
// if the poller would only be woken up by the timer, in which case the association
252+
// would be automatically cancelled, but it can also be woken up by other events,
253+
// such as a netpollBreak, so we can get to this point with a timer that hasn't
254+
// expired yet. In this case, the completion packet can still be picked up by
255+
// another thread, so defer the cancellation until it is really necessary.
256+
errno := stdcall2(_NtCancelWaitCompletionPacket, mp.waitIocpHandle, 1)
257+
switch errno {
258+
case STATUS_CANCELLED:
259+
// STATUS_CANCELLED is returned when the associated timer has already expired,
260+
// in which automatically cancels the wait completion packet.
261+
fallthrough
262+
case STATUS_SUCCESS:
263+
dt := -delay / 100 // relative sleep (negative), 100ns units
264+
if stdcall6(_SetWaitableTimer, mp.waitIocpTimer, uintptr(unsafe.Pointer(&dt)), 0, 0, 0, 0) == 0 {
265+
println("runtime: SetWaitableTimer failed; errno=", getlasterror())
266+
throw("runtime: netpoll failed")
267+
}
268+
key := packNetpollKey(netpollSourceTimer, nil)
269+
if errno := stdcall8(_NtAssociateWaitCompletionPacket, mp.waitIocpHandle, iocphandle, mp.waitIocpTimer, key, 0, 0, 0, uintptr(unsafe.Pointer(&signaled))); errno != 0 {
270+
println("runtime: NtAssociateWaitCompletionPacket failed; errno=", errno)
271+
throw("runtime: netpoll failed")
272+
}
273+
case STATUS_PENDING:
274+
// STATUS_PENDING is returned if the wait operation can't be cancelled yet.
275+
// This can happen if this thread was woken up by another event, such as a netpollBreak,
276+
// and the timer expired just while calling NtCancelWaitCompletionPacket, in which case
277+
// this call fails to cancel the association to avoid a race condition.
278+
// This is a rare case, so we can just avoid using the high resolution timer this time.
279+
default:
280+
println("runtime: NtCancelWaitCompletionPacket failed; errno=", errno)
281+
throw("runtime: netpoll failed")
282+
}
283+
return signaled
284+
}

Diff for: src/runtime/nonwindows_stub.go

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ package runtime
1212
// timer precision to keep the timer error acceptable.
1313
const osRelaxMinNS = 0
1414

15+
var haveHighResSleep = true
16+
1517
// osRelax is called by the scheduler when transitioning to and from
1618
// all Ps being idle.
1719
func osRelax(relax bool) {}

Diff for: src/runtime/os_windows.go

+50-4
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,11 @@ var (
131131
// Load ntdll.dll manually during startup, otherwise Mingw
132132
// links wrong printf function to cgo executable (see issue
133133
// 12030 for details).
134-
_RtlGetCurrentPeb stdFunction
135-
_RtlGetNtVersionNumbers stdFunction
134+
_NtCreateWaitCompletionPacket stdFunction
135+
_NtAssociateWaitCompletionPacket stdFunction
136+
_NtCancelWaitCompletionPacket stdFunction
137+
_RtlGetCurrentPeb stdFunction
138+
_RtlGetNtVersionNumbers stdFunction
136139

137140
// These are from non-kernel32.dll, so we prefer to LoadLibraryEx them.
138141
_timeBeginPeriod,
@@ -161,7 +164,9 @@ type mOS struct {
161164
waitsema uintptr // semaphore for parking on locks
162165
resumesema uintptr // semaphore to indicate suspend/resume
163166

164-
highResTimer uintptr // high resolution timer handle used in usleep
167+
highResTimer uintptr // high resolution timer handle used in usleep
168+
waitIocpTimer uintptr // high resolution timer handle used in netpoll
169+
waitIocpHandle uintptr // wait completion handle used in netpoll
165170

166171
// preemptExtLock synchronizes preemptM with entry/exit from
167172
// external C code.
@@ -250,6 +255,18 @@ func loadOptionalSyscalls() {
250255
if n32 == 0 {
251256
throw("ntdll.dll not found")
252257
}
258+
_NtCreateWaitCompletionPacket = windowsFindfunc(n32, []byte("NtCreateWaitCompletionPacket\000"))
259+
if _NtCreateWaitCompletionPacket != nil {
260+
// These functions should exists if NtCreateWaitCompletionPacket exists.
261+
_NtAssociateWaitCompletionPacket = windowsFindfunc(n32, []byte("NtAssociateWaitCompletionPacket\000"))
262+
if _NtAssociateWaitCompletionPacket == nil {
263+
throw("NtCreateWaitCompletionPacket exists but NtAssociateWaitCompletionPacket does not")
264+
}
265+
_NtCancelWaitCompletionPacket = windowsFindfunc(n32, []byte("NtCancelWaitCompletionPacket\000"))
266+
if _NtCancelWaitCompletionPacket == nil {
267+
throw("NtCreateWaitCompletionPacket exists but NtCancelWaitCompletionPacket does not")
268+
}
269+
}
253270
_RtlGetCurrentPeb = windowsFindfunc(n32, []byte("RtlGetCurrentPeb\000"))
254271
_RtlGetNtVersionNumbers = windowsFindfunc(n32, []byte("RtlGetNtVersionNumbers\000"))
255272
}
@@ -374,6 +391,13 @@ func osRelax(relax bool) uint32 {
374391
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available.
375392
var haveHighResTimer = false
376393

394+
// haveHighResSleep indicates that NtCreateWaitCompletionPacket
395+
// exists and haveHighResTimer is true.
396+
// NtCreateWaitCompletionPacket has been available since Windows 10,
397+
// but has just been publicly documented, so some platforms, like Wine,
398+
// doesn't support it yet.
399+
var haveHighResSleep = false
400+
377401
// createHighResTimer calls CreateWaitableTimerEx with
378402
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag to create high
379403
// resolution timer. createHighResTimer returns new timer
@@ -397,6 +421,7 @@ func initHighResTimer() {
397421
h := createHighResTimer()
398422
if h != 0 {
399423
haveHighResTimer = true
424+
haveHighResSleep = _NtCreateWaitCompletionPacket != nil
400425
stdcall1(_CloseHandle, h)
401426
} else {
402427
// Only load winmm.dll if we need it.
@@ -797,7 +822,7 @@ func sigblock(exiting bool) {
797822
}
798823

799824
// Called to initialize a new m (including the bootstrap m).
800-
// Called on the new thread, cannot allocate memory.
825+
// Called on the new thread, cannot allocate Go memory.
801826
func minit() {
802827
var thandle uintptr
803828
if stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS) == 0 {
@@ -818,6 +843,19 @@ func minit() {
818843
throw("CreateWaitableTimerEx when creating timer failed")
819844
}
820845
}
846+
if mp.waitIocpHandle == 0 && haveHighResSleep {
847+
mp.waitIocpTimer = createHighResTimer()
848+
if mp.waitIocpTimer == 0 {
849+
print("runtime: CreateWaitableTimerEx failed; errno=", getlasterror(), "\n")
850+
throw("CreateWaitableTimerEx when creating timer failed")
851+
}
852+
const GENERIC_ALL = 0x10000000
853+
errno := stdcall3(_NtCreateWaitCompletionPacket, uintptr(unsafe.Pointer(&mp.waitIocpHandle)), GENERIC_ALL, 0)
854+
if mp.waitIocpHandle == 0 {
855+
print("runtime: NtCreateWaitCompletionPacket failed; errno=", errno, "\n")
856+
throw("NtCreateWaitCompletionPacket failed")
857+
}
858+
}
821859
unlock(&mp.threadLock)
822860

823861
// Query the true stack base from the OS. Currently we're
@@ -872,6 +910,14 @@ func mdestroy(mp *m) {
872910
stdcall1(_CloseHandle, mp.highResTimer)
873911
mp.highResTimer = 0
874912
}
913+
if mp.waitIocpTimer != 0 {
914+
stdcall1(_CloseHandle, mp.waitIocpTimer)
915+
mp.waitIocpTimer = 0
916+
}
917+
if mp.waitIocpHandle != 0 {
918+
stdcall1(_CloseHandle, mp.waitIocpHandle)
919+
mp.waitIocpHandle = 0
920+
}
875921
if mp.waitsema != 0 {
876922
stdcall1(_CloseHandle, mp.waitsema)
877923
mp.waitsema = 0

Diff for: src/time/sleep_test.go

+22-10
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,33 @@ import (
1515
"sync/atomic"
1616
"testing"
1717
. "time"
18+
_ "unsafe" // for go:linkname
1819
)
1920

21+
// haveHighResSleep is true if the system supports at least ~1ms sleeps.
22+
//
23+
//go:linkname haveHighResSleep runtime.haveHighResSleep
24+
var haveHighResSleep bool
25+
26+
// adjustDelay returns an adjusted delay based on the system sleep resolution.
2027
// Go runtime uses different Windows timers for time.Now and sleeping.
2128
// These can tick at different frequencies and can arrive out of sync.
2229
// The effect can be seen, for example, as time.Sleep(100ms) is actually
2330
// shorter then 100ms when measured as difference between time.Now before and
2431
// after time.Sleep call. This was observed on Windows XP SP3 (windows/386).
25-
// windowsInaccuracy is to ignore such errors.
26-
const windowsInaccuracy = 17 * Millisecond
32+
func adjustDelay(t *testing.T, delay Duration) Duration {
33+
if haveHighResSleep {
34+
return delay
35+
}
36+
t.Log("adjusting delay for low resolution sleep")
37+
switch runtime.GOOS {
38+
case "windows":
39+
return delay - 17*Millisecond
40+
default:
41+
t.Fatal("adjustDelay unimplemented on " + runtime.GOOS)
42+
return 0
43+
}
44+
}
2745

2846
func TestSleep(t *testing.T) {
2947
const delay = 100 * Millisecond
@@ -33,10 +51,7 @@ func TestSleep(t *testing.T) {
3351
}()
3452
start := Now()
3553
Sleep(delay)
36-
delayadj := delay
37-
if runtime.GOOS == "windows" {
38-
delayadj -= windowsInaccuracy
39-
}
54+
delayadj := adjustDelay(t, delay)
4055
duration := Now().Sub(start)
4156
if duration < delayadj {
4257
t.Fatalf("Sleep(%s) slept for only %s", delay, duration)
@@ -247,10 +262,7 @@ func TestAfter(t *testing.T) {
247262
const delay = 100 * Millisecond
248263
start := Now()
249264
end := <-After(delay)
250-
delayadj := delay
251-
if runtime.GOOS == "windows" {
252-
delayadj -= windowsInaccuracy
253-
}
265+
delayadj := adjustDelay(t, delay)
254266
if duration := Now().Sub(start); duration < delayadj {
255267
t.Fatalf("After(%s) slept for only %d ns", delay, duration)
256268
}

Diff for: src/time/time.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@
8181
//
8282
// Timer resolution varies depending on the Go runtime, the operating system
8383
// and the underlying hardware.
84-
// On Unix, the resolution is approximately 1ms.
85-
// On Windows, the default resolution is approximately 16ms, but
84+
// On Unix, the resolution is ~1ms.
85+
// On Windows version 1803 and newer, the resolution is ~0.5ms.
86+
// On older Windows versions, the default resolution is ~16ms, but
8687
// a higher resolution may be requested using [golang.org/x/sys/windows.TimeBeginPeriod].
8788
package time
8889

0 commit comments

Comments
 (0)