Skip to content

Commit 6382893

Browse files
committed
math/rand/v2: add ChaCha8
ChaCha8 provides a cryptographically strong generator alongside PCG, so that people who want stronger randomness have access to that. On systems with 128-bit vector math assembly (amd64 and arm64), ChaCha8 runs at about the same speed as PCG (25% slower on amd64, 2% faster on arm64). Obviously all the claimed benchmark variation other than the new ChaCha8 benchmark is a lie. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ afa459a2f0.amd64 │ bbb48afeb7.amd64 │ │ sec/op │ sec/op vs base │ PCG_DXSM-32 1.488n ± 2% 1.492n ± 2% ~ (p=0.309 n=20) ChaCha8-32 1.861n ± 2% SourceUint64-32 1.450n ± 3% 1.590n ± 2% +9.69% (p=0.000 n=20) GlobalInt64-32 2.067n ± 2% 2.061n ± 1% ~ (p=0.952 n=20) GlobalInt64Parallel-32 0.1044n ± 2% 0.1041n ± 1% ~ (p=0.498 n=20) GlobalUint64-32 2.085n ± 0% 2.256n ± 2% +8.23% (p=0.000 n=20) GlobalUint64Parallel-32 0.1008n ± 1% 0.1018n ± 1% ~ (p=0.041 n=20) Int64-32 1.779n ± 1% 1.779n ± 1% ~ (p=0.410 n=20) Uint64-32 1.854n ± 2% 1.882n ± 1% ~ (p=0.044 n=20) GlobalIntN1000-32 3.140n ± 3% 3.115n ± 3% ~ (p=0.673 n=20) IntN1000-32 2.496n ± 1% 2.509n ± 1% ~ (p=0.171 n=20) Int64N1000-32 2.510n ± 2% 2.493n ± 1% ~ (p=0.804 n=20) Int64N1e8-32 2.471n ± 2% 2.521n ± 1% +1.98% (p=0.003 n=20) Int64N1e9-32 2.488n ± 2% 2.506n ± 1% ~ (p=0.663 n=20) Int64N2e9-32 2.478n ± 2% 2.482n ± 2% ~ (p=0.533 n=20) Int64N1e18-32 3.088n ± 1% 3.216n ± 1% +4.15% (p=0.000 n=20) Int64N2e18-32 3.493n ± 1% 3.635n ± 2% +4.05% (p=0.000 n=20) Int64N4e18-32 5.060n ± 2% 5.122n ± 1% +1.22% (p=0.000 n=20) Int32N1000-32 2.620n ± 1% 2.672n ± 1% +2.00% (p=0.002 n=20) Int32N1e8-32 2.652n ± 0% 2.646n ± 1% ~ (p=0.743 n=20) Int32N1e9-32 2.644n ± 1% 2.660n ± 2% ~ (p=0.163 n=20) Int32N2e9-32 2.619n ± 2% 2.652n ± 1% ~ (p=0.132 n=20) Float32-32 2.261n ± 1% 2.267n ± 1% ~ (p=0.516 n=20) Float64-32 2.241n ± 2% 2.276n ± 1% ~ (p=0.080 n=20) ExpFloat64-32 3.716n ± 1% 3.779n ± 1% +1.68% (p=0.007 n=20) NormFloat64-32 3.718n ± 1% 3.747n ± 1% ~ (p=0.011 n=20) Perm3-32 34.11n ± 2% 34.23n ± 2% ~ (p=0.779 n=20) Perm30-32 200.6n ± 0% 202.3n ± 2% ~ (p=0.055 n=20) Perm30ViaShuffle-32 109.7n ± 1% 115.5n ± 2% +5.34% (p=0.000 n=20) ShuffleOverhead-32 107.2n ± 1% 113.3n ± 1% +5.74% (p=0.000 n=20) Concurrent-32 2.108n ± 6% 2.107n ± 1% ~ (p=0.448 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ afa459a2f0.arm64 │ bbb48afeb7.arm64 │ │ sec/op │ sec/op vs base │ PCG_DXSM-8 2.531n ± 0% 2.529n ± 0% ~ (p=0.586 n=20) ChaCha8-8 2.480n ± 0% SourceUint64-8 2.531n ± 0% 2.534n ± 0% ~ (p=0.227 n=20) GlobalInt64-8 2.177n ± 1% 2.173n ± 1% ~ (p=0.733 n=20) GlobalInt64Parallel-8 0.4319n ± 0% 0.4304n ± 0% -0.32% (p=0.003 n=20) GlobalUint64-8 2.185n ± 1% 2.185n ± 0% ~ (p=0.541 n=20) GlobalUint64Parallel-8 0.4295n ± 1% 0.4294n ± 0% ~ (p=0.203 n=20) Int64-8 4.104n ± 0% 4.107n ± 0% ~ (p=0.193 n=20) Uint64-8 4.080n ± 0% 4.081n ± 0% ~ (p=0.053 n=20) GlobalIntN1000-8 2.814n ± 1% 2.814n ± 0% ~ (p=0.879 n=20) IntN1000-8 4.140n ± 0% 4.141n ± 0% ~ (p=0.428 n=20) Int64N1000-8 4.139n ± 0% 4.140n ± 0% ~ (p=0.114 n=20) Int64N1e8-8 4.140n ± 0% 4.140n ± 0% ~ (p=0.898 n=20) Int64N1e9-8 4.139n ± 0% 4.140n ± 0% ~ (p=0.593 n=20) Int64N2e9-8 4.140n ± 0% 4.139n ± 0% ~ (p=0.158 n=20) Int64N1e18-8 5.273n ± 0% 5.274n ± 0% ~ (p=0.308 n=20) Int64N2e18-8 6.059n ± 0% 6.058n ± 0% ~ (p=0.053 n=20) Int64N4e18-8 8.803n ± 0% 8.800n ± 0% ~ (p=0.673 n=20) Int32N1000-8 4.131n ± 0% 4.131n ± 0% ~ (p=0.342 n=20) Int32N1e8-8 4.131n ± 0% 4.131n ± 0% ~ (p=0.091 n=20) Int32N1e9-8 4.131n ± 0% 4.131n ± 0% ~ (p=0.273 n=20) Int32N2e9-8 4.131n ± 0% 4.131n ± 0% ~ (p=0.425 n=20) Float32-8 4.110n ± 0% 4.112n ± 0% ~ (p=0.203 n=20) Float64-8 4.104n ± 0% 4.106n ± 0% ~ (p=0.409 n=20) ExpFloat64-8 5.338n ± 0% 5.339n ± 0% ~ (p=0.037 n=20) NormFloat64-8 5.731n ± 0% 5.733n ± 0% ~ (p=0.692 n=20) Perm3-8 26.62n ± 0% 26.65n ± 0% +0.09% (p=0.000 n=20) Perm30-8 194.6n ± 2% 194.9n ± 0% ~ (p=0.141 n=20) Perm30ViaShuffle-8 156.4n ± 0% 156.5n ± 0% +0.06% (p=0.000 n=20) ShuffleOverhead-8 125.8n ± 0% 125.0n ± 0% -0.64% (p=0.000 n=20) Concurrent-8 2.654n ± 6% 2.441n ± 6% -8.06% (p=0.009 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ afa459a2f0.386 │ bbb48afeb7.386 │ │ sec/op │ sec/op vs base │ PCG_DXSM-32 7.793n ± 2% 7.647n ± 1% ~ (p=0.021 n=20) ChaCha8-32 11.48n ± 2% SourceUint64-32 7.680n ± 1% 7.714n ± 1% ~ (p=0.713 n=20) GlobalInt64-32 3.474n ± 3% 3.491n ± 28% ~ (p=0.337 n=20) GlobalInt64Parallel-32 0.3253n ± 0% 0.3194n ± 0% -1.81% (p=0.000 n=20) GlobalUint64-32 3.433n ± 2% 3.610n ± 2% +5.14% (p=0.000 n=20) GlobalUint64Parallel-32 0.3156n ± 0% 0.3164n ± 0% ~ (p=0.073 n=20) Int64-32 7.707n ± 1% 7.824n ± 0% +1.52% (p=0.005 n=20) Uint64-32 7.714n ± 1% 7.732n ± 2% ~ (p=0.441 n=20) GlobalIntN1000-32 6.236n ± 1% 6.176n ± 2% ~ (p=0.499 n=20) IntN1000-32 10.41n ± 1% 10.31n ± 2% ~ (p=0.782 n=20) Int64N1000-32 10.97n ± 2% 11.22n ± 2% +2.19% (p=0.002 n=20) Int64N1e8-32 10.98n ± 1% 11.07n ± 1% ~ (p=0.056 n=20) Int64N1e9-32 10.95n ± 0% 11.15n ± 2% ~ (p=0.016 n=20) Int64N2e9-32 11.11n ± 1% 11.00n ± 1% ~ (p=0.654 n=20) Int64N1e18-32 15.18n ± 2% 14.97n ± 2% ~ (p=0.387 n=20) Int64N2e18-32 15.61n ± 1% 15.91n ± 1% +1.92% (p=0.003 n=20) Int64N4e18-32 19.23n ± 2% 18.98n ± 1% ~ (p=1.000 n=20) Int32N1000-32 10.35n ± 1% 10.31n ± 2% ~ (p=0.081 n=20) Int32N1e8-32 10.33n ± 1% 10.38n ± 1% ~ (p=0.335 n=20) Int32N1e9-32 10.35n ± 1% 10.37n ± 1% ~ (p=0.497 n=20) Int32N2e9-32 10.35n ± 1% 10.41n ± 1% ~ (p=0.605 n=20) Float32-32 13.57n ± 1% 13.78n ± 2% ~ (p=0.047 n=20) Float64-32 22.95n ± 4% 23.43n ± 3% ~ (p=0.218 n=20) ExpFloat64-32 15.23n ± 2% 15.46n ± 1% ~ (p=0.095 n=20) NormFloat64-32 13.78n ± 1% 13.73n ± 2% ~ (p=0.031 n=20) Perm3-32 46.62n ± 2% 47.46n ± 2% +1.82% (p=0.004 n=20) Perm30-32 400.7n ± 1% 403.5n ± 1% ~ (p=0.098 n=20) Perm30ViaShuffle-32 350.5n ± 1% 348.1n ± 2% ~ (p=0.703 n=20) ShuffleOverhead-32 326.0n ± 2% 326.2n ± 2% ~ (p=0.440 n=20) Concurrent-32 3.290n ± 0% 3.297n ± 4% ~ (p=0.189 n=20) For #61716. Change-Id: Id2a7e1c1db0beb81f563faaefba65fe292497269 Reviewed-on: https://go-review.googlesource.com/c/go/+/516859 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Filippo Valsorda <[email protected]> Reviewed-by: Heschi Kreinick <[email protected]>
1 parent 06145fe commit 6382893

File tree

11 files changed

+1479
-3
lines changed

11 files changed

+1479
-3
lines changed

api/next/61716.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pkg math/rand/v2, func Int64N(int64) int64 #61716
99
pkg math/rand/v2, func IntN(int) int #61716
1010
pkg math/rand/v2, func N[$0 intType]($0) $0 #61716
1111
pkg math/rand/v2, func New(Source) *Rand #61716
12+
pkg math/rand/v2, func NewChaCha8([32]uint8) *ChaCha8 #61716
1213
pkg math/rand/v2, func NewPCG(uint64, uint64) *PCG #61716
1314
pkg math/rand/v2, func NewZipf(*Rand, float64, float64, uint64) *Zipf #61716
1415
pkg math/rand/v2, func NormFloat64() float64 #61716
@@ -19,6 +20,10 @@ pkg math/rand/v2, func Uint32N(uint32) uint32 #61716
1920
pkg math/rand/v2, func Uint64() uint64 #61716
2021
pkg math/rand/v2, func Uint64N(uint64) uint64 #61716
2122
pkg math/rand/v2, func UintN(uint) uint #61716
23+
pkg math/rand/v2, method (*ChaCha8) MarshalBinary() ([]uint8, error) #61716
24+
pkg math/rand/v2, method (*ChaCha8) Seed([32]uint8) #61716
25+
pkg math/rand/v2, method (*ChaCha8) Uint64() uint64 #61716
26+
pkg math/rand/v2, method (*ChaCha8) UnmarshalBinary([]uint8) error #61716
2227
pkg math/rand/v2, method (*PCG) MarshalBinary() ([]uint8, error) #61716
2328
pkg math/rand/v2, method (*PCG) Seed(uint64, uint64) #61716
2429
pkg math/rand/v2, method (*PCG) Uint64() uint64 #61716
@@ -41,6 +46,7 @@ pkg math/rand/v2, method (*Rand) Uint64() uint64 #61716
4146
pkg math/rand/v2, method (*Rand) Uint64N(uint64) uint64 #61716
4247
pkg math/rand/v2, method (*Rand) UintN(uint) uint #61716
4348
pkg math/rand/v2, method (*Zipf) Uint64() uint64 #61716
49+
pkg math/rand/v2, type ChaCha8 struct #61716
4450
pkg math/rand/v2, type PCG struct #61716
4551
pkg math/rand/v2, type Rand struct #61716
4652
pkg math/rand/v2, type Source interface { Uint64 } #61716

src/go/build/deps_test.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,17 @@ var depsRules = `
5555
5656
unsafe < maps;
5757
58+
unsafe < internal/chacha8rand;
59+
5860
# RUNTIME is the core runtime group of packages, all of them very light-weight.
59-
internal/abi, internal/cpu, internal/goarch,
60-
internal/coverage/rtcov, internal/godebugs, internal/goexperiment,
61-
internal/goos, unsafe
61+
internal/abi,
62+
internal/chacha8rand,
63+
internal/coverage/rtcov,
64+
internal/cpu,
65+
internal/goarch,
66+
internal/godebugs,
67+
internal/goexperiment,
68+
internal/goos
6269
< internal/bytealg
6370
< internal/itoa
6471
< internal/unsafeheader

src/internal/chacha8rand/chacha8.go

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Package chacha8rand implements a pseudorandom generator
6+
// based on ChaCha8. It is used by both runtime and math/rand/v2
7+
// and must have no dependencies.
8+
package chacha8rand
9+
10+
import "unsafe"
11+
12+
const (
13+
ctrInc = 4 // increment counter by 4 between block calls
14+
ctrMax = 16 // reseed when counter reaches 16
15+
chunk = 32 // each chunk produced by block is 32 uint64s
16+
reseed = 4 // reseed with 4 words
17+
)
18+
19+
// block is the chacha8rand block function.
20+
func block(seed *[4]uint64, blocks *[32]uint64, counter uint32)
21+
22+
// A State holds the state for a single random generator.
23+
// It must be used from one goroutine at a time.
24+
// If used by multiple goroutines at a time, the goroutines
25+
// may see the same random values, but the code will not
26+
// crash or cause out-of-bounds memory accesses.
27+
type State struct {
28+
buf [32]uint64
29+
seed [4]uint64
30+
i uint32
31+
n uint32
32+
c uint32
33+
}
34+
35+
// Next returns the next random value, along with a boolean
36+
// indicating whether one was available.
37+
// If one is not available, the caller should call Refill
38+
// and then repeat the call to Next.
39+
//
40+
// Next is //go:nosplit to allow its use in the runtime
41+
// with per-m data without holding the per-m lock.
42+
//go:nosplit
43+
func (s *State) Next() (uint64, bool) {
44+
i := s.i
45+
if i >= s.n {
46+
return 0, false
47+
}
48+
s.i = i + 1
49+
return s.buf[i&31], true // i&31 eliminates bounds check
50+
}
51+
52+
// Init seeds the State with the given seed value.
53+
func (s *State) Init(seed [32]byte) {
54+
s.Init64(*(*[4]uint64)(unsafe.Pointer(&seed)))
55+
}
56+
57+
// Init64 seeds the state with the given seed value.
58+
func (s *State) Init64(seed [4]uint64) {
59+
s.seed = seed
60+
block(&s.seed, &s.buf, 0)
61+
s.c = 0
62+
s.i = 0
63+
s.n = chunk
64+
}
65+
66+
// Refill refills the state with more random values.
67+
// After a call to Refill, an immediate call to Next will succeed
68+
// (unless multiple goroutines are incorrectly sharing a state).
69+
func (s *State) Refill() {
70+
s.c += ctrInc
71+
if s.c == ctrMax {
72+
// Reseed with generated uint64s for forward secrecy.
73+
// Normally this is done immediately after computing a block,
74+
// but we do it immediately before computing the next block,
75+
// to allow a much smaller serialized state (just the seed plus offset).
76+
// This gives a delayed benefit for the forward secrecy
77+
// (you can reconstruct the recent past given a memory dump),
78+
// which we deem acceptable in exchange for the reduced size.
79+
s.seed[0] = s.buf[len(s.buf)-reseed+0]
80+
s.seed[1] = s.buf[len(s.buf)-reseed+1]
81+
s.seed[2] = s.buf[len(s.buf)-reseed+2]
82+
s.seed[3] = s.buf[len(s.buf)-reseed+3]
83+
s.c = 0
84+
}
85+
block(&s.seed, &s.buf, s.c)
86+
s.i = 0
87+
s.n = uint32(len(s.buf))
88+
if s.c == ctrMax-ctrInc {
89+
s.n = uint32(len(s.buf)) - reseed
90+
}
91+
}
92+
93+
// Marshal marshals the state into a byte slice.
94+
// Marshal and Unmarshal are functions, not methods,
95+
// so that they will not be linked into the runtime
96+
// when it uses the State struct, since the runtime
97+
// does not need these.
98+
func Marshal(s *State) []byte {
99+
data := make([]byte, 6*8)
100+
copy(data, "chacha8:")
101+
used := (s.c/ctrInc)*chunk + s.i
102+
bePutUint64(data[1*8:], uint64(used))
103+
for i, seed := range s.seed {
104+
lePutUint64(data[(2+i)*8:], seed)
105+
}
106+
return data
107+
}
108+
109+
type errUnmarshalChaCha8 struct{}
110+
111+
func (*errUnmarshalChaCha8) Error() string {
112+
return "invalid ChaCha8 encoding"
113+
}
114+
115+
// Unmarshal unmarshals the state from a byte slice.
116+
func Unmarshal(s *State, data []byte) error {
117+
if len(data) != 6*8 || string(data[:8]) != "chacha8:" {
118+
return new(errUnmarshalChaCha8)
119+
}
120+
used := beUint64(data[1*8:])
121+
if used > (ctrMax/ctrInc)*chunk-reseed {
122+
return new(errUnmarshalChaCha8)
123+
}
124+
for i := range s.seed {
125+
s.seed[i] = leUint64(data[(2+i)*8:])
126+
}
127+
s.c = ctrInc * (uint32(used) / chunk)
128+
block(&s.seed, &s.buf, s.c)
129+
s.i = uint32(used) % chunk
130+
s.n = chunk
131+
if s.c == ctrMax-ctrInc {
132+
s.n = chunk - reseed
133+
}
134+
return nil
135+
}
136+
137+
// binary.bigEndian.Uint64, copied to avoid dependency
138+
func beUint64(b []byte) uint64 {
139+
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
140+
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
141+
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
142+
}
143+
144+
// binary.bigEndian.PutUint64, copied to avoid dependency
145+
func bePutUint64(b []byte, v uint64) {
146+
_ = b[7] // early bounds check to guarantee safety of writes below
147+
b[0] = byte(v >> 56)
148+
b[1] = byte(v >> 48)
149+
b[2] = byte(v >> 40)
150+
b[3] = byte(v >> 32)
151+
b[4] = byte(v >> 24)
152+
b[5] = byte(v >> 16)
153+
b[6] = byte(v >> 8)
154+
b[7] = byte(v)
155+
}
156+
157+
// binary.littleEndian.Uint64, copied to avoid dependency
158+
func leUint64(b []byte) uint64 {
159+
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
160+
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
161+
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
162+
}
163+
164+
// binary.littleEndian.PutUint64, copied to avoid dependency
165+
func lePutUint64(b []byte, v uint64) {
166+
_ = b[7] // early bounds check to guarantee safety of writes below
167+
b[0] = byte(v)
168+
b[1] = byte(v >> 8)
169+
b[2] = byte(v >> 16)
170+
b[3] = byte(v >> 24)
171+
b[4] = byte(v >> 32)
172+
b[5] = byte(v >> 40)
173+
b[6] = byte(v >> 48)
174+
b[7] = byte(v >> 56)
175+
}

0 commit comments

Comments
 (0)