From 71d348c5e538173c503beffd5e6dd1c3013c45ea Mon Sep 17 00:00:00 2001 From: Mike Nix Date: Thu, 10 Oct 2019 21:54:39 +0800 Subject: [PATCH 1/6] precache() - preload code into the flash cache. By preloading code into the flash cache we can take control over when SPI Flash reads will occur when code is executing. This can be useful where the timing of a section of code is extremely critical and we don't want random pauses to pull code in from the SPI flash chip. It can also be useful for code that accesses/uses SPI0 which is connected to the flash chip. Non interrupt handler code that is infrequently called but might otherwise require being in valuable IRAM - such as bit-banging I/O code or some code run at bootup can avoid being permanently in IRAM. Macros are provided to make precaching one or more blocks of code in any function easy. --- cores/esp8266/core_esp8266_features.cpp | 39 +++++++++++++++++++++++++ cores/esp8266/core_esp8266_features.h | 13 +++++++++ 2 files changed, 52 insertions(+) create mode 100644 cores/esp8266/core_esp8266_features.cpp diff --git a/cores/esp8266/core_esp8266_features.cpp b/cores/esp8266/core_esp8266_features.cpp new file mode 100644 index 0000000000..bb5acd9e68 --- /dev/null +++ b/cores/esp8266/core_esp8266_features.cpp @@ -0,0 +1,39 @@ + +/* + core_esp8266_features.cpp + + Copyright (c) 2019 Mike Nix. All rights reserved. + This file is part of the esp8266 core for Arduino environment. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/* precache() + * pre-loads flash data into the flash cache + * if f==0, preloads instructions starting at the address we were called from. + * otherwise preloads flash at the given address. + * All preloads are word aligned. + */ +void precache(void *f, uint32_t bytes) { + // Size of a cache page in words. We only need to read one word per + // page (ie 1 word in 8) for this to work. + #define CACHE_PAGE_SIZE (32/4) + + register uint32_t a0 asm("a0"); + volatile uint32_t *p = (uint32_t*)((f ? (uint32_t)f : a0) & ~0x03); + uint32_t x; + for (uint32_t i=0; i<=(bytes/4); i+=CACHE_PAGE_SIZE, p+=CACHE_PAGE_SIZE) x=*p; + (void)x; +} diff --git a/cores/esp8266/core_esp8266_features.h b/cores/esp8266/core_esp8266_features.h index 6b8e22c9b0..0a67048134 100644 --- a/cores/esp8266/core_esp8266_features.h +++ b/cores/esp8266/core_esp8266_features.h @@ -93,4 +93,17 @@ inline uint32_t esp_get_cycle_count() { } #endif // not CORE_MOCK + +// Tools for preloading code into the flash cache +#define PRECACHE_ATTR __attribute__((optimize("no-reorder-blocks"))) + +#define PRECACHE_START(tag) \ + precache(NULL,(uint8_t *)&&_precache_end_##tag - (uint8_t*)&&_precache_start_##tag); \ + _precache_start_##tag: + +#define PRECACHE_END(tag) \ + _precache_end_##tag: + +void precache(void *f, uint32_t bytes); + #endif // CORE_ESP8266_FEATURES_H From acde951ec38fa8dd6e3ecde01d2edcae77edc915 Mon Sep 17 00:00:00 2001 From: Mike Nix Date: Thu, 10 Oct 2019 22:25:20 +0800 Subject: [PATCH 2/6] Fix missing include --- cores/esp8266/core_esp8266_features.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cores/esp8266/core_esp8266_features.cpp b/cores/esp8266/core_esp8266_features.cpp index bb5acd9e68..9463e181a9 100644 --- a/cores/esp8266/core_esp8266_features.cpp +++ b/cores/esp8266/core_esp8266_features.cpp @@ -20,6 +20,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include + /* precache() * pre-loads flash data into the flash cache * if f==0, preloads instructions starting at the address we were called from. From e74736c2d4d08661ce7b9fd0ccdbabed3aa830fd Mon Sep 17 00:00:00 2001 From: Mike Nix Date: Fri, 11 Oct 2019 14:09:07 +0800 Subject: [PATCH 3/6] Make precache extern "C" --- cores/esp8266/core_esp8266_features.cpp | 2 +- cores/esp8266/core_esp8266_features.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cores/esp8266/core_esp8266_features.cpp b/cores/esp8266/core_esp8266_features.cpp index 9463e181a9..cdb189f947 100644 --- a/cores/esp8266/core_esp8266_features.cpp +++ b/cores/esp8266/core_esp8266_features.cpp @@ -28,7 +28,7 @@ * otherwise preloads flash at the given address. * All preloads are word aligned. */ -void precache(void *f, uint32_t bytes) { +extern "C" void precache(void *f, uint32_t bytes) { // Size of a cache page in words. We only need to read one word per // page (ie 1 word in 8) for this to work. #define CACHE_PAGE_SIZE (32/4) diff --git a/cores/esp8266/core_esp8266_features.h b/cores/esp8266/core_esp8266_features.h index 0a67048134..88b4e5fed8 100644 --- a/cores/esp8266/core_esp8266_features.h +++ b/cores/esp8266/core_esp8266_features.h @@ -104,6 +104,6 @@ inline uint32_t esp_get_cycle_count() { #define PRECACHE_END(tag) \ _precache_end_##tag: -void precache(void *f, uint32_t bytes); +extern "C" void precache(void *f, uint32_t bytes); #endif // CORE_ESP8266_FEATURES_H From 2cbdb413ebf55289238f2f395184ee3910106c59 Mon Sep 17 00:00:00 2001 From: Mike Nix Date: Fri, 11 Oct 2019 15:15:13 +0800 Subject: [PATCH 4/6] Attempt 2 at making precache extern "C" --- cores/esp8266/core_esp8266_features.cpp | 10 +++++++++- cores/esp8266/core_esp8266_features.h | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/cores/esp8266/core_esp8266_features.cpp b/cores/esp8266/core_esp8266_features.cpp index cdb189f947..4b058b27df 100644 --- a/cores/esp8266/core_esp8266_features.cpp +++ b/cores/esp8266/core_esp8266_features.cpp @@ -28,7 +28,11 @@ * otherwise preloads flash at the given address. * All preloads are word aligned. */ -extern "C" void precache(void *f, uint32_t bytes) { +#ifdef __cplusplus +extern "C" { +#endif + +void precache(void *f, uint32_t bytes) { // Size of a cache page in words. We only need to read one word per // page (ie 1 word in 8) for this to work. #define CACHE_PAGE_SIZE (32/4) @@ -39,3 +43,7 @@ extern "C" void precache(void *f, uint32_t bytes) { for (uint32_t i=0; i<=(bytes/4); i+=CACHE_PAGE_SIZE, p+=CACHE_PAGE_SIZE) x=*p; (void)x; } + +#ifdef __cplusplus +} +#endif diff --git a/cores/esp8266/core_esp8266_features.h b/cores/esp8266/core_esp8266_features.h index 88b4e5fed8..31b0dfa6e1 100644 --- a/cores/esp8266/core_esp8266_features.h +++ b/cores/esp8266/core_esp8266_features.h @@ -104,6 +104,14 @@ inline uint32_t esp_get_cycle_count() { #define PRECACHE_END(tag) \ _precache_end_##tag: -extern "C" void precache(void *f, uint32_t bytes); +#ifdef __cplusplus +extern "C" { +#endif + +void precache(void *f, uint32_t bytes); + +#ifdef __cplusplus +} +#endif #endif // CORE_ESP8266_FEATURES_H From 0a7862a258f685b13a49bce3a83f17fd28cc05ab Mon Sep 17 00:00:00 2001 From: Mike Nix Date: Fri, 11 Oct 2019 17:39:09 +0800 Subject: [PATCH 5/6] Fix calculation of number of cache lines to preload With certain alignments/lengths of code it was possible to not read enough into the flash cache. This commit makes the length calculation clearer and adds an extra cache line to ensure we precache enough code. --- cores/esp8266/core_esp8266_features.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cores/esp8266/core_esp8266_features.cpp b/cores/esp8266/core_esp8266_features.cpp index 4b058b27df..03396e5008 100644 --- a/cores/esp8266/core_esp8266_features.cpp +++ b/cores/esp8266/core_esp8266_features.cpp @@ -33,14 +33,15 @@ extern "C" { #endif void precache(void *f, uint32_t bytes) { - // Size of a cache page in words. We only need to read one word per + // Size of a cache page in bytes. We only need to read one word per // page (ie 1 word in 8) for this to work. - #define CACHE_PAGE_SIZE (32/4) + #define CACHE_PAGE_SIZE 32 register uint32_t a0 asm("a0"); + register uint32_t lines = (bytes/CACHE_PAGE_SIZE)+2; volatile uint32_t *p = (uint32_t*)((f ? (uint32_t)f : a0) & ~0x03); uint32_t x; - for (uint32_t i=0; i<=(bytes/4); i+=CACHE_PAGE_SIZE, p+=CACHE_PAGE_SIZE) x=*p; + for (uint32_t i=0; i Date: Fri, 11 Oct 2019 20:29:57 +0800 Subject: [PATCH 6/6] Add noinline to PRECACHE_ATTR macro Precached code needs to be noinline to ensure the no-reorder-blocks is applied. --- cores/esp8266/core_esp8266_features.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cores/esp8266/core_esp8266_features.h b/cores/esp8266/core_esp8266_features.h index 31b0dfa6e1..1486e9c201 100644 --- a/cores/esp8266/core_esp8266_features.h +++ b/cores/esp8266/core_esp8266_features.h @@ -95,7 +95,8 @@ inline uint32_t esp_get_cycle_count() { // Tools for preloading code into the flash cache -#define PRECACHE_ATTR __attribute__((optimize("no-reorder-blocks"))) +#define PRECACHE_ATTR __attribute__((optimize("no-reorder-blocks"))) \ + __attribute__((noinline)) #define PRECACHE_START(tag) \ precache(NULL,(uint8_t *)&&_precache_end_##tag - (uint8_t*)&&_precache_start_##tag); \