2
2
// Use of this source code is governed by a BSD-style
3
3
// license that can be found in the LICENSE file.
4
4
5
- // Package ChaCha20 implements the core ChaCha20 function as specified
6
- // in https://tools.ietf.org/html/rfc7539#section-2.3 .
5
+ // Package chacha20 implements the ChaCha20 encryption algorithm
6
+ // as specified in RFC 8439 .
7
7
package chacha20
8
8
9
9
import (
10
10
"crypto/cipher"
11
11
"encoding/binary"
12
+ "math/bits"
12
13
13
14
"golang.org/x/crypto/internal/subtle"
14
15
)
15
16
16
- // assert that *Cipher implements cipher.Stream
17
- var _ cipher.Stream = (* Cipher )(nil )
18
-
19
17
// Cipher is a stateful instance of ChaCha20 using a particular key
20
18
// and nonce. A *Cipher implements the cipher.Stream interface.
21
19
type Cipher struct {
20
+ // The ChaCha20 state is 16 words: 4 constant, 8 of key, 1 of counter
21
+ // (incremented after each block), and 3 of nonce.
22
22
key [8 ]uint32
23
- counter uint32 // incremented after each block
23
+ counter uint32
24
24
nonce [3 ]uint32
25
- buf [bufSize ]byte // buffer for unused keystream bytes
26
- len int // number of unused keystream bytes at end of buf
25
+
26
+ // The last len bytes of buf are leftover key stream bytes from the previous
27
+ // XORKeyStream invocation. The size of buf depends on how many blocks are
28
+ // computed at a time.
29
+ buf [bufSize ]byte
30
+ len int
27
31
}
28
32
33
+ var _ cipher.Stream = (* Cipher )(nil )
34
+
29
35
// New creates a new ChaCha20 stream cipher with the given key and nonce.
30
36
// The initial counter value is set to 0.
31
37
func New (key [8 ]uint32 , nonce [3 ]uint32 ) * Cipher {
32
38
return & Cipher {key : key , nonce : nonce }
33
39
}
34
40
35
- // ChaCha20 constants spelling "expand 32-byte k"
41
+ // The constant first 4 words of the ChaCha20 state.
36
42
const (
37
- j0 uint32 = 0x61707865
38
- j1 uint32 = 0x3320646e
39
- j2 uint32 = 0x79622d32
40
- j3 uint32 = 0x6b206574
43
+ j0 uint32 = 0x61707865 // expa
44
+ j1 uint32 = 0x3320646e // nd 3
45
+ j2 uint32 = 0x79622d32 // 2-by
46
+ j3 uint32 = 0x6b206574 // te k
41
47
)
42
48
49
+ const blockSize = 64
50
+
51
+ // quarterRound is the core of ChaCha20. It shuffles the bits of 4 state words.
52
+ // It's executed 4 times for each of the 20 ChaCha20 rounds, operating on all 16
53
+ // words each round, in columnar or diagonal groups of 4 at a time.
43
54
func quarterRound (a , b , c , d uint32 ) (uint32 , uint32 , uint32 , uint32 ) {
44
55
a += b
45
56
d ^= a
46
- d = ( d << 16 ) | ( d >> 16 )
57
+ d = bits . RotateLeft32 ( d , 16 )
47
58
c += d
48
59
b ^= c
49
- b = ( b << 12 ) | ( b >> 20 )
60
+ b = bits . RotateLeft32 ( b , 12 )
50
61
a += b
51
62
d ^= a
52
- d = ( d << 8 ) | ( d >> 24 )
63
+ d = bits . RotateLeft32 ( d , 8 )
53
64
c += d
54
65
b ^= c
55
- b = ( b << 7 ) | ( b >> 25 )
66
+ b = bits . RotateLeft32 ( b , 7 )
56
67
return a , b , c , d
57
68
}
58
69
@@ -67,116 +78,141 @@ func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
67
78
// the src buffers was passed in a single run. That is, Cipher
68
79
// maintains state and does not reset at each XORKeyStream call.
69
80
func (s * Cipher ) XORKeyStream (dst , src []byte ) {
81
+ if len (src ) == 0 {
82
+ return
83
+ }
70
84
if len (dst ) < len (src ) {
71
85
panic ("chacha20: output smaller than input" )
72
86
}
73
- if subtle .InexactOverlap (dst [:len (src )], src ) {
87
+ dst = dst [:len (src )]
88
+ if subtle .InexactOverlap (dst , src ) {
74
89
panic ("chacha20: invalid buffer overlap" )
75
90
}
76
91
77
- // xor src with buffered keystream first
92
+ // First, drain any remaining key stream from a previous XORKeyStream.
78
93
if s .len != 0 {
79
- buf := s .buf [len (s .buf )- s .len :]
80
- if len (src ) < len (buf ) {
81
- buf = buf [:len (src )]
82
- }
83
- td , ts := dst [:len (buf )], src [:len (buf )] // BCE hint
84
- for i , b := range buf {
85
- td [i ] = ts [i ] ^ b
94
+ keyStream := s .buf [bufSize - s .len :]
95
+ if len (src ) < len (keyStream ) {
96
+ keyStream = keyStream [:len (src )]
86
97
}
87
- s . len -= len (buf )
88
- if s . len != 0 {
89
- return
98
+ _ = src [ len (keyStream ) - 1 ] // bounds check elimination hint
99
+ for i , b := range keyStream {
100
+ dst [ i ] = src [ i ] ^ b
90
101
}
91
- s .buf = [ len (s . buf )] byte {} // zero the empty buffer
92
- src = src [len (buf ):]
93
- dst = dst [len (buf ):]
102
+ s .len -= len (keyStream )
103
+ src = src [len (keyStream ):]
104
+ dst = dst [len (keyStream ):]
94
105
}
95
106
96
- if len (src ) == 0 {
97
- return
107
+ const blocksPerBuf = bufSize / blockSize
108
+ numBufs := (uint64 (len (src )) + bufSize - 1 ) / bufSize
109
+ if uint64 (s .counter )+ numBufs * blocksPerBuf >= 1 << 32 {
110
+ panic ("chacha20: counter overflow" )
98
111
}
99
- if haveAsm {
100
- if uint64 (len (src ))+ uint64 (s .counter )* 64 > (1 << 38 )- 64 {
101
- panic ("chacha20: counter overflow" )
102
- }
103
- s .xorKeyStreamAsm (dst , src )
104
- return
112
+
113
+ // xorKeyStreamBlocks implementations expect input lengths that are a
114
+ // multiple of bufSize. Platform-specific ones process multiple blocks at a
115
+ // time, so have bufSizes that are a multiple of blockSize.
116
+
117
+ rem := len (src ) % bufSize
118
+ full := len (src ) - rem
119
+
120
+ if full > 0 {
121
+ s .xorKeyStreamBlocks (dst [:full ], src [:full ])
105
122
}
106
123
107
- // set up a 64-byte buffer to pad out the final block if needed
108
- // (hoisted out of the main loop to avoid spills)
109
- rem := len (src ) % 64 // length of final block
110
- fin := len (src ) - rem // index of final block
124
+ // If we have a partial (multi-)block, pad it for xorKeyStreamBlocks, and
125
+ // keep the leftover keystream for the next XORKeyStream invocation.
111
126
if rem > 0 {
112
- copy (s .buf [len (s .buf )- 64 :], src [fin :])
127
+ s .buf = [bufSize ]byte {}
128
+ copy (s .buf [:], src [full :])
129
+ s .xorKeyStreamBlocks (s .buf [:], s .buf [:])
130
+ s .len = bufSize - copy (dst [full :], s .buf [:])
131
+ }
132
+ }
133
+
134
+ func (s * Cipher ) xorKeyStreamBlocksGeneric (dst , src []byte ) {
135
+ if len (dst ) != len (src ) || len (dst )% blockSize != 0 {
136
+ panic ("chacha20: internal error: wrong dst and/or src length" )
113
137
}
114
138
115
- // pre-calculate most of the first round
116
- s1 , s5 , s9 , s13 := quarterRound (j1 , s .key [1 ], s .key [5 ], s .nonce [0 ])
117
- s2 , s6 , s10 , s14 := quarterRound (j2 , s .key [2 ], s .key [6 ], s .nonce [1 ])
118
- s3 , s7 , s11 , s15 := quarterRound (j3 , s .key [3 ], s .key [7 ], s .nonce [2 ])
139
+ // To generate each block of key stream, the initial cipher state
140
+ // (represented below) is passed through 20 rounds of shuffling,
141
+ // alternatively applying quarterRounds by columns (like 1, 5, 9, 13)
142
+ // or by diagonals (like 1, 6, 11, 12).
143
+ //
144
+ // 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc
145
+ // 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk
146
+ // 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk
147
+ // 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn
148
+ //
149
+ // c=constant k=key b=blockcount n=nonce
150
+ var (
151
+ c0 , c1 , c2 , c3 = j0 , j1 , j2 , j3
152
+ c4 , c5 , c6 , c7 = s .key [0 ], s .key [1 ], s .key [2 ], s .key [3 ]
153
+ c8 , c9 , c10 , c11 = s .key [4 ], s .key [5 ], s .key [6 ], s .key [7 ]
154
+ _ , c13 , c14 , c15 = s .counter , s .nonce [0 ], s .nonce [1 ], s .nonce [2 ]
155
+ )
119
156
120
- n := len (src )
121
- src , dst = src [:n :n ], dst [:n :n ] // BCE hint
122
- for i := 0 ; i < n ; i += 64 {
123
- // calculate the remainder of the first round
124
- s0 , s4 , s8 , s12 := quarterRound (j0 , s .key [0 ], s .key [4 ], s .counter )
157
+ // Three quarters of the first round don't depend on the counter, so we can
158
+ // calculate them here, and reuse them for multiple blocks in the loop.
159
+ // TODO(filippo): experiment with reusing across XORKeyStream calls.
160
+ s1 , s5 , s9 , s13 := quarterRound (c1 , c5 , c9 , c13 )
161
+ s2 , s6 , s10 , s14 := quarterRound (c2 , c6 , c10 , c14 )
162
+ s3 , s7 , s11 , s15 := quarterRound (c3 , c7 , c11 , c15 )
125
163
126
- // execute the second round
164
+ for i := 0 ; i < len (src ); i += blockSize {
165
+ // The remainder of the first column round.
166
+ s0 , s4 , s8 , s12 := quarterRound (c0 , c4 , c8 , s .counter )
167
+
168
+ // The second diagonal round.
127
169
x0 , x5 , x10 , x15 := quarterRound (s0 , s5 , s10 , s15 )
128
170
x1 , x6 , x11 , x12 := quarterRound (s1 , s6 , s11 , s12 )
129
171
x2 , x7 , x8 , x13 := quarterRound (s2 , s7 , s8 , s13 )
130
172
x3 , x4 , x9 , x14 := quarterRound (s3 , s4 , s9 , s14 )
131
173
132
- // execute the remaining 18 rounds
174
+ // The remaining 18 rounds.
133
175
for i := 0 ; i < 9 ; i ++ {
176
+ // Column round.
134
177
x0 , x4 , x8 , x12 = quarterRound (x0 , x4 , x8 , x12 )
135
178
x1 , x5 , x9 , x13 = quarterRound (x1 , x5 , x9 , x13 )
136
179
x2 , x6 , x10 , x14 = quarterRound (x2 , x6 , x10 , x14 )
137
180
x3 , x7 , x11 , x15 = quarterRound (x3 , x7 , x11 , x15 )
138
181
182
+ // Diagonal round.
139
183
x0 , x5 , x10 , x15 = quarterRound (x0 , x5 , x10 , x15 )
140
184
x1 , x6 , x11 , x12 = quarterRound (x1 , x6 , x11 , x12 )
141
185
x2 , x7 , x8 , x13 = quarterRound (x2 , x7 , x8 , x13 )
142
186
x3 , x4 , x9 , x14 = quarterRound (x3 , x4 , x9 , x14 )
143
187
}
144
188
145
- x0 += j0
146
- x1 += j1
147
- x2 += j2
148
- x3 += j3
149
-
150
- x4 += s .key [0 ]
151
- x5 += s .key [1 ]
152
- x6 += s .key [2 ]
153
- x7 += s .key [3 ]
154
- x8 += s .key [4 ]
155
- x9 += s .key [5 ]
156
- x10 += s .key [6 ]
157
- x11 += s .key [7 ]
158
-
189
+ // Finally, add back the initial state to generate the key stream.
190
+ x0 += c0
191
+ x1 += c1
192
+ x2 += c2
193
+ x3 += c3
194
+ x4 += c4
195
+ x5 += c5
196
+ x6 += c6
197
+ x7 += c7
198
+ x8 += c8
199
+ x9 += c9
200
+ x10 += c10
201
+ x11 += c11
159
202
x12 += s .counter
160
- x13 += s . nonce [ 0 ]
161
- x14 += s . nonce [ 1 ]
162
- x15 += s . nonce [ 2 ]
203
+ x13 += c13
204
+ x14 += c14
205
+ x15 += c15
163
206
164
- // increment the counter
165
207
s .counter += 1
166
208
if s .counter == 0 {
167
- panic ("chacha20: counter overflow" )
209
+ panic ("chacha20: internal error: counter overflow" )
168
210
}
169
211
170
- // pad to 64 bytes if needed
171
212
in , out := src [i :], dst [i :]
172
- if i == fin {
173
- // src[fin:] has already been copied into s.buf before
174
- // the main loop
175
- in , out = s .buf [len (s .buf )- 64 :], s .buf [len (s .buf )- 64 :]
176
- }
177
- in , out = in [:64 ], out [:64 ] // BCE hint
213
+ in , out = in [:blockSize ], out [:blockSize ] // bounds check elimination hint
178
214
179
- // XOR the key stream with the source and write out the result
215
+ // XOR the key stream with the source and write out the result.
180
216
xor (out [0 :], in [0 :], x0 )
181
217
xor (out [4 :], in [4 :], x1 )
182
218
xor (out [8 :], in [8 :], x2 )
@@ -194,22 +230,13 @@ func (s *Cipher) XORKeyStream(dst, src []byte) {
194
230
xor (out [56 :], in [56 :], x14 )
195
231
xor (out [60 :], in [60 :], x15 )
196
232
}
197
- // copy any trailing bytes out of the buffer and into dst
198
- if rem != 0 {
199
- s .len = 64 - rem
200
- copy (dst [fin :], s .buf [len (s .buf )- 64 :])
201
- }
202
233
}
203
234
204
235
// Advance discards bytes in the key stream until the next 64 byte block
205
- // boundary is reached and updates the counter accordingly. If the key
206
- // stream is already at a block boundary no bytes will be discarded and
207
- // the counter will be unchanged.
236
+ // boundary is reached. If the key stream is already at a block boundary no
237
+ // bytes will be discarded.
208
238
func (s * Cipher ) Advance () {
209
- s .len -= s .len % 64
210
- if s .len == 0 {
211
- s .buf = [len (s .buf )]byte {}
212
- }
239
+ s .len -= s .len % blockSize
213
240
}
214
241
215
242
// XORKeyStream crypts bytes from in to out using the given key and counters.
@@ -246,11 +273,13 @@ func HChaCha20(key *[8]uint32, nonce *[4]uint32) [8]uint32 {
246
273
x12 , x13 , x14 , x15 := nonce [0 ], nonce [1 ], nonce [2 ], nonce [3 ]
247
274
248
275
for i := 0 ; i < 10 ; i ++ {
276
+ // Diagonal round.
249
277
x0 , x4 , x8 , x12 = quarterRound (x0 , x4 , x8 , x12 )
250
278
x1 , x5 , x9 , x13 = quarterRound (x1 , x5 , x9 , x13 )
251
279
x2 , x6 , x10 , x14 = quarterRound (x2 , x6 , x10 , x14 )
252
280
x3 , x7 , x11 , x15 = quarterRound (x3 , x7 , x11 , x15 )
253
281
282
+ // Column round.
254
283
x0 , x5 , x10 , x15 = quarterRound (x0 , x5 , x10 , x15 )
255
284
x1 , x6 , x11 , x12 = quarterRound (x1 , x6 , x11 , x12 )
256
285
x2 , x7 , x8 , x13 = quarterRound (x2 , x7 , x8 , x13 )
0 commit comments