-
-
Notifications
You must be signed in to change notification settings - Fork 1k
Expand file tree
/
Copy pathrp2040-boot-stage2.S
More file actions
471 lines (390 loc) · 16.2 KB
/
rp2040-boot-stage2.S
File metadata and controls
471 lines (390 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
//
// Implementation of RP2040 stage 2 boot loader. This code is derived from the
// Winbond W25Q080 implementation (as found in the Pico) in the official Pico SDK.
//
// This implementation has been made 'stand-alone' by including necessary code /
// symbols from the included files in the reference implementation directly into
// the source. It has also been modified to include the conditional logic from
// the CircuitPython implementation that supports additional flash chips. The
// CircuitPython source is here:
// https://github.com/adafruit/circuitpython/blob/main/ports/raspberrypi/stage2.c.jinja
//
// This file cannot be assembled directly, instead assemble the board-specific file
// (such as pico-boot-stage2.S) which defines the parameters specific to the flash
// chip included on that board.
//
// Care has been taken to preserve ordering and it has been verified the generated
// binary is byte-for-byte identical to the reference code binary when assembled for
// the Pico.
//
// Note: the stage 2 boot loader must be 256 bytes in length and have a checksum
// present. In TinyGo, the linker script is responsible for allocating 256 bytes
// for the .boot2 section and the build logic patches the checksum into the
// binary after linking, controlled by the '<target>.json' flag 'rp2040-boot-patch'.
//
// The stage 2 bootstrap section can be inspected in an elf file using this command:
// objdump -s -j .boot2 <binary>.elf
//
// Original Source:
// https://github.com/raspberrypi/pico-sdk/blob/master/src/rp2_common/boot_stage2/boot2_w25q080.S
//
// ----------------------------------------------------------------------------
// Second stage boot code
// Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd.
// SPDX-License-Identifier: BSD-3-Clause
//
// Device: Winbond W25Q080
// Also supports W25Q16JV (which has some different SR instructions)
// Also supports AT25SF081
// Also supports S25FL132K0
//
// Description: Configures W25Q080 to run in Quad I/O continuous read XIP mode
//
// Details: * Check status register 2 to determine if QSPI mode is enabled,
// and perform an SR2 programming cycle if necessary.
// * Use SSI to perform a dummy 0xEB read command, with the mode
// continuation bits set, so that the flash will not require
// 0xEB instruction prefix on subsequent reads.
// * Configure SSI to write address, mode bits, but no instruction.
// SSI + flash are now jointly in a state where continuous reads
// can take place.
// * Jump to exit pointer passed in via lr. Bootrom passes null,
// in which case this code uses a default 256 byte flash offset
//
// Building: * This code must be position-independent, and use stack only
// * The code will be padded to a size of 256 bytes, including a
// 4-byte checksum. Therefore code size cannot exceed 252 bytes.
// ----------------------------------------------------------------------------
//
// Expanded include files
//
#define CMD_WRITE_ENABLE 0x06
#define CMD_READ_STATUS1 0x05
#define CMD_READ_STATUS2 0x35
#define CMD_WRITE_STATUS1 0x01
#define CMD_WRITE_STATUS2 0x31
#define SREG_DATA 0x02 // Enable quad-SPI mode
#define XIP_BASE 0x10000000
#define XIP_SSI_BASE 0x18000000
#define PADS_QSPI_BASE 0x40020000
#define PPB_BASE 0xe0000000
#define M0PLUS_VTOR_OFFSET 0x0000ed08
#define PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB 4
#define PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS 0x00000001
#define PADS_QSPI_GPIO_QSPI_SCLK_OFFSET 0x00000004
#define PADS_QSPI_GPIO_QSPI_SD0_OFFSET 0x00000008
#define PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS 0x00000002
#define PADS_QSPI_GPIO_QSPI_SD1_OFFSET 0x0000000c
#define PADS_QSPI_GPIO_QSPI_SD2_OFFSET 0x00000010
#define PADS_QSPI_GPIO_QSPI_SD3_OFFSET 0x00000014
#define SSI_CTRLR0_OFFSET 0x00000000
#define SSI_CTRLR1_OFFSET 0x00000004
#define SSI_SSIENR_OFFSET 0x00000008
#define SSI_BAUDR_OFFSET 0x00000014
#define SSI_SR_OFFSET 0x00000028
#define SSI_DR0_OFFSET 0x00000060
#define SSI_RX_SAMPLE_DLY_OFFSET 0x000000f0
#define SSI_CTRLR0_DFS_32_LSB 16
#define SSI_CTRLR0_SPI_FRF_VALUE_QUAD 0x2
#define SSI_CTRLR0_SPI_FRF_LSB 21
#define SSI_CTRLR0_TMOD_VALUE_TX_AND_RX 0x0
#define SSI_CTRLR0_TMOD_VALUE_EEPROM_READ 0x3
#define SSI_CTRLR0_TMOD_LSB 8
#define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A 0x1
#define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A 0x2
#define SSI_SPI_CTRLR0_OFFSET 0x000000f4
#define SSI_SPI_CTRLR0_INST_L_VALUE_NONE 0x0
#define SSI_SPI_CTRLR0_INST_L_VALUE_8B 0x2
#define SSI_SPI_CTRLR0_TRANS_TYPE_LSB 0
#define SSI_SPI_CTRLR0_ADDR_L_LSB 2
#define SSI_SPI_CTRLR0_INST_L_LSB 8
#define SSI_SPI_CTRLR0_WAIT_CYCLES_LSB 11
#define SSI_SPI_CTRLR0_XIP_CMD_LSB 24
#define SSI_SR_BUSY_BITS 0x00000001
#define SSI_SR_TFE_BITS 0x00000004
// ----------------------------------------------------------------------------
// Config section
// ----------------------------------------------------------------------------
// It should be possible to support most flash devices by modifying this section
// The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV.
// This must be a positive, even integer.
// The bootrom is very conservative with SPI frequency, but here we should be
// as aggressive as possible.
#define PICO_FLASH_SPI_CLKDIV BOARD_PICO_FLASH_SPI_CLKDIV
#if PICO_FLASH_SPI_CLKDIV & 1
#error PICO_FLASH_SPI_CLKDIV must be even
#endif
#if BOARD_QUAD_OK==1
// Define interface width: single/dual/quad IO
#define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_QUAD
#define TRANSACTION_TYPE SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A
// Note that the INST_L field is used to select what XIP data gets pushed into
// the TX FIFO:
// INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD
// Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD
#define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_NONE
#define READ_INSTRUCTION MODE_CONTINUOUS_READ
#define ADDR_L 8 // 6 for address, 2 for mode
#else
#define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_STD
#define TRANSACTION_TYPE SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C1A
#define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_8B
#define READ_INSTRUCTION BOARD_CMD_READ
#define ADDR_L 6 // * 4 = 24
#endif
// The flash-chip specific read isntruction
#define CMD_READ BOARD_CMD_READ
// "Mode bits" are 8 special bits sent immediately after
// the address bits in a "Read Data Fast Quad I/O" command sequence.
// On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the
// next read does not require the 0xeb instruction prefix.
#define MODE_CONTINUOUS_READ 0xa0
// How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles
// are required.
#define WAIT_CYCLES BOARD_WAIT_CYCLES
// If defined, we will read status reg, compare to SREG_DATA, and overwrite
// with our value if the SR doesn't match.
// We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to
// SR2 (31h cmd) as the latter command isn't supported by WX25Q080.
// This isn't great because it will remove block protections.
// A better solution is to use a volatile SR write if your device supports it.
#define PROGRAM_STATUS_REG
.syntax unified
.cpu cortex-m0plus
.thumb
.section .boot2, "ax"
// The exit point is passed in lr. If entered from bootrom, this will be the
// flash address immediately following this second stage (0x10000100).
// Otherwise it will be a return address -- second stage being called as a
// function by user code, after copying out of XIP region. r3 holds SSI base,
// r0...2 used as temporaries. Other GPRs not used.
.global _stage2_boot
.type _stage2_boot,%function
.thumb_func
_stage2_boot:
push {lr}
// Set pad configuration:
// - SCLK 8mA drive, no slew limiting
// - SDx disable input Schmitt to reduce delay
ldr r3, =PADS_QSPI_BASE
movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS)
str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET]
ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET]
movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS
bics r0, r1
#if BOARD_QUAD_OK==1
str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET]
#endif
str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET]
#if BOARD_QUAD_OK==1
str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET]
str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET]
#endif
ldr r3, =XIP_SSI_BASE
// Disable SSI to allow further config
movs r1, #0
str r1, [r3, #SSI_SSIENR_OFFSET]
// Set baud rate
movs r1, #PICO_FLASH_SPI_CLKDIV
str r1, [r3, #SSI_BAUDR_OFFSET]
// Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means,
// if the flash launches data on SCLK posedge, we capture it at the time that
// the next SCLK posedge is launched. This is shortly before that posedge
// arrives at the flash, so data hold time should be ok. For
// PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect.
movs r1, #1
movs r2, #SSI_RX_SAMPLE_DLY_OFFSET // == 0xf0 so need 8 bits of offset significance
str r1, [r3, r2]
// On QSPI parts we usually need a 01h SR-write command to enable QSPI mode
// (i.e. turn WPn and HOLDn into IO2/IO3)
#ifdef PROGRAM_STATUS_REG
program_sregs:
#define CTRL0_SPI_TXRX \
(7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \
(SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB)
ldr r1, =(CTRL0_SPI_TXRX)
str r1, [r3, #SSI_CTRLR0_OFFSET]
// Enable SSI and select slave 0
movs r1, #1
str r1, [r3, #SSI_SSIENR_OFFSET]
// Check whether SR needs updating
#if BOARD_QUAD_OK==1
# if BOARD_QUAD_ENABLE_STATUS_BYTE==1
movs r0, #CMD_READ_STATUS1
# elif BOARD_QUAD_ENABLE_STATUS_BYTE==2
movs r0, #CMD_READ_STATUS2
# endif
bl read_flash_sreg
movs r2, #BOARD_QUAD_ENABLE_BIT_MASK
cmp r0, r2
beq skip_sreg_programming
// Send write enable command
movs r1, #CMD_WRITE_ENABLE
str r1, [r3, #SSI_DR0_OFFSET]
// Poll for completion and discard RX
bl wait_ssi_ready
ldr r1, [r3, #SSI_DR0_OFFSET]
// Send status write command followed by data bytes
# if BOARD_SPLIT_STATUS_WRITE==1
# if BOARD_QUAD_ENABLE_STATUS_BYTE==1
movs r1, #CMD_WRITE_STATUS1
# elif BOARD_QUAD_ENABLE_STATUS_BYTE==2
movs r1, #CMD_WRITE_STATUS2
# endif
str r1, [r3, #SSI_DR0_OFFSET]
str r2, [r3, #SSI_DR0_OFFSET]
bl wait_ssi_ready
//ldr r1, [r3, #SSI_DR0_OFFSET]
ldr r1, [r3, #SSI_DR0_OFFSET]
ldr r1, [r3, #SSI_DR0_OFFSET]
# else
movs r1, #CMD_WRITE_STATUS1
str r1, [r3, #SSI_DR0_OFFSET]
# if BOARD_QUAD_ENABLE_STATUS_BYTE==2
movs r0, #0
str r0, [r3, #SSI_DR0_OFFSET]
# endif
str r2, [r3, #SSI_DR0_OFFSET]
bl wait_ssi_ready
ldr r1, [r3, #SSI_DR0_OFFSET]
ldr r1, [r3, #SSI_DR0_OFFSET]
# if BOARD_QUAD_ENABLE_STATUS_BYTE==2
ldr r1, [r3, #SSI_DR0_OFFSET]
# endif
# endif
// Poll status register for write completion
1:
movs r0, #CMD_READ_STATUS1
bl read_flash_sreg
movs r1, #1
tst r0, r1
bne 1b
#endif
skip_sreg_programming:
// Disable SSI again so that it can be reconfigured
movs r1, #0
str r1, [r3, #SSI_SSIENR_OFFSET]
#endif
// Currently the flash expects an 8 bit serial command prefix on every
// transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O
// command, with mode bits set such that the flash will not expect a serial
// command prefix on *subsequent* transfers. We don't care about the results
// of the read, the important part is the mode bits.
dummy_read:
#define CTRLR0_ENTER_XIP \
(FRAME_FORMAT /* Quad I/O mode */ \
<< SSI_CTRLR0_SPI_FRF_LSB) | \
(31 << SSI_CTRLR0_DFS_32_LSB) | /* 32 data bits */ \
(SSI_CTRLR0_TMOD_VALUE_EEPROM_READ /* Send INST/ADDR, Receive Data */ \
<< SSI_CTRLR0_TMOD_LSB)
ldr r1, =(CTRLR0_ENTER_XIP)
str r1, [r3, #SSI_CTRLR0_OFFSET]
movs r1, #0x0 // NDF=0 (single 32b read)
str r1, [r3, #SSI_CTRLR1_OFFSET]
#if BOARD_QUAD_OK==1
#define SPI_CTRLR0_ENTER_XIP \
(ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Address + mode bits */ \
(WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \
(SSI_SPI_CTRLR0_INST_L_VALUE_8B \
<< SSI_SPI_CTRLR0_INST_L_LSB) | /* 8-bit instruction */ \
(SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A /* Send Command in serial mode then address in Quad I/O mode */ \
<< SSI_SPI_CTRLR0_TRANS_TYPE_LSB)
ldr r1, =(SPI_CTRLR0_ENTER_XIP)
ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) // SPI_CTRL0 Register
str r1, [r0]
movs r1, #1 // Re-enable SSI
str r1, [r3, #SSI_SSIENR_OFFSET]
movs r1, #CMD_READ
str r1, [r3, #SSI_DR0_OFFSET] // Push SPI command into TX FIFO
movs r1, #MODE_CONTINUOUS_READ // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010
str r1, [r3, #SSI_DR0_OFFSET] // Push Address into TX FIFO - this will trigger the transaction
// Poll for completion
bl wait_ssi_ready
// The flash is in a state where we can blast addresses in parallel, and get
// parallel data back. Now configure the SSI to translate XIP bus accesses
// into QSPI transfers of this form.
movs r1, #0
str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI (and clear FIFO) to allow further config
#endif
// Note that the INST_L field is used to select what XIP data gets pushed into
// the TX FIFO:
// INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD
// Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD
configure_ssi:
#define SPI_CTRLR0_XIP \
(READ_INSTRUCTION /* Mode bits to keep flash in continuous read mode */ \
<< SSI_SPI_CTRLR0_XIP_CMD_LSB) | \
(ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Total number of address + mode bits */ \
(WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \
(INSTRUCTION_LENGTH /* Do not send a command, instead send XIP_CMD as mode bits after address */ \
<< SSI_SPI_CTRLR0_INST_L_LSB) | \
(TRANSACTION_TYPE /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \
<< SSI_SPI_CTRLR0_TRANS_TYPE_LSB)
ldr r1, =(SPI_CTRLR0_XIP)
ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET)
str r1, [r0]
movs r1, #1
str r1, [r3, #SSI_SSIENR_OFFSET] // Re-enable SSI
// Bus accesses to the XIP window will now be transparently serviced by the
// external flash on cache miss. We are ready to run code from flash.
//
// Helper Includes
//
//
// #include "boot2_helpers/exit_from_boot2.S"
//
// If entered from the bootrom, lr (which we earlier pushed) will be 0,
// and we vector through the table at the start of the main flash image.
// Any regular function call will have a nonzero value for lr.
check_return:
pop {r0}
cmp r0, #0
beq vector_into_flash
bx r0
vector_into_flash:
ldr r0, =(XIP_BASE + 0x100)
ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET)
str r0, [r1]
ldmia r0, {r0, r1}
msr msp, r0
bx r1
//
// #include "boot2_helpers/wait_ssi_ready.S"
//
wait_ssi_ready:
push {r0, r1, lr}
// Command is complete when there is nothing left to send
// (TX FIFO empty) and SSI is no longer busy (CSn deasserted)
1:
ldr r1, [r3, #SSI_SR_OFFSET]
movs r0, #SSI_SR_TFE_BITS
tst r1, r0
beq 1b
movs r0, #SSI_SR_BUSY_BITS
tst r1, r0
bne 1b
pop {r0, r1, pc}
#ifdef PROGRAM_STATUS_REG
//
// #include "boot2_helpers/read_flash_sreg.S"
//
// Pass status read cmd into r0.
// Returns status value in r0.
.global read_flash_sreg
.type read_flash_sreg,%function
.thumb_func
read_flash_sreg:
push {r1, lr}
str r0, [r3, #SSI_DR0_OFFSET]
// Dummy byte:
str r0, [r3, #SSI_DR0_OFFSET]
bl wait_ssi_ready
// Discard first byte and combine the next two
ldr r0, [r3, #SSI_DR0_OFFSET]
ldr r0, [r3, #SSI_DR0_OFFSET]
pop {r1, pc}
#endif
.global literals
literals:
.ltorg
.end