From 0af9c5eb15bf4004ead1581e0fa1af4cb63d8e9b Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Mon, 8 Jul 2019 14:42:41 -0700 Subject: [PATCH 01/17] Correct critical section with interrupt level preserving and nest support alternative. Replace ets_intr_lock()/ets_intr_unlock() with uint32_t oldValue=xt_rsil(3)/xt_wrs(oldValue). Added UMM_CRITICAL_DECL macro to define storage for current state. Expanded UMM_CRITICAL_... to use unique identifiers. This helpt facilitate gather function specific timing information. Replace printf with something that is ROM or IRAM based so that a printf that occurs during an ISR malloc/new does not cause a crash. To avoid any reentry issue it should also avoid doing malloc lib calls. Refactor realloc to avoid memcpy/memmove while in critical section. This is only effective when realloc is called with interrupts enabled. The copy process alone can take over 10us (when copying more than ~498 bytes with a 80MHz CPU clock). It would be good practice for an ISR to avoid realloc. Note, while doing this might initially sound scary, this appears to be very stable. It ran on my troublesome sketch for over 3 weeks until I got back from vacation and flashed an update. Troublesome sketch - runs ESPAsyncTCP, with modified fauxmo emulation for 10 devices. It receives lost of Network traffic related to uPnP scans, which includes lots of TCP connects disconnects RSTs related to uPnP discovery. I have clocked umm_info critical lock time taking as much as 180us. A common use for the umm_info call is to get the free heap result. It is common to try and closely monitor free heap as a method to detect memory leaks. This may result in frequent calls to umm_info. There has not been a clear test case that shows an issue yet; however, I and others think they are or have had crashes related to this. I have added code that adjusts the running free heap number from _umm_malloc, _umm_realloc, and _umm_free. Removing the need to do a long interrupts disabled calculation via _umm_info. Build optional, min/max time measurements for locks held while in info, malloc, realloc, and free. Also, maintain a count of how many times each is called with INTLEVEL set. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 213 ++++++++++++++++++++-- cores/esp8266/umm_malloc/umm_malloc_cfg.h | 89 ++++++++- 2 files changed, 280 insertions(+), 22 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 09cfbdda71..8ab283c5ef 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -498,9 +498,30 @@ #include "umm_malloc.h" #include "umm_malloc_cfg.h" /* user-dependent */ +#if defined(DEBUG_ESP_PORT) +#include +#endif extern "C" { +#ifdef UMM_CRITICAL_PERIOD_ANALYZE +static struct _UMM_TIME_STATS time_stats = { + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U} }; + +bool ICACHE_FLASH_ATTR get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size) { + if (p && size != 0) { + uint32_t save_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); + memcpy(p, &time_stats, size); + xt_wsr_ps(save_ps); + return true; + } + return false; +} +#endif + // From UMM, the last caller of a malloc/realloc/calloc which failed: extern void *umm_last_fail_alloc_addr; extern int umm_last_fail_alloc_size; @@ -519,8 +540,92 @@ extern int umm_last_fail_alloc_size; # define DBG_LOG_LEVEL DBG_LOG_LEVEL #endif +/* +Changes: + + Correct critical section with interrupt level preserving and nest support + alternative. Replace ets_intr_lock()/ets_intr_unlock() with uint32_t + oldValue=xt_rsil(3)/xt_wrs(oldValue). Added UMM_CRITICAL_DECL macro to define + storage for current state. Expanded UMM_CRITICAL_... to use unique + identifiers. This helpt facilitate gather function specific timing + information. + + Replace printf with something that is ROM or IRAM based so that a printf + that occurs during an ISR malloc/new does not cause a crash. To avoid any + reentry issue it should also avoid doing malloc lib calls. + + Refactor realloc to avoid memcpy/memmove while in critical section. This is + only effective when realloc is called with interrupts enabled. The copy + process alone can take over 10us (when copying more than ~498 bytes with a + 80MHz CPU clock). It would be good practice for an ISR to avoid realloc. + Note, while doing this might initially sound scary, this appears to be very + stable. It ran on my troublesome sketch for over 3 weeks until I got back from + vacation and flashed an update. Troublesome sketch - runs ESPAsyncTCP, with + modified fauxmo emulation for 10 devices. It receives lost of Network traffic + related to uPnP scans, which includes lots of TCP connects disconnects RSTs + related to uPnP discovery. + + I have clocked umm_info critical lock time taking as much as 180us. A common + use for the umm_info call is to get the free heap result. It is common + to try and closely monitor free heap as a method to detect memory leaks. + This may result in frequent calls to umm_info. There has not been a clear + test case that shows an issue yet; however, I and others think they are or + have had crashes related to this. + + I have added code that adjusts the running free heap number from _umm_malloc, + _umm_realloc, and _umm_free. Removing the need to do a long interrupts + disabled calculation via _umm_info. + + Build optional, min/max time measurements for locks held while in info, + malloc, realloc, and free. Also, maintain a count of how many times each is + called with INTLEVEL set. + + */ + +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +#define DEBUG_IRAM_ATTR ICACHE_RAM_ATTR +/* + Printing from the malloc routines is tricky. Since a lot of library calls + will want to do malloc. + + Objective: To be able to print "last gasp" diagnostic messages + when interrupts are disabled and w/o availability of heap resources. +*/ + +// ROM _putc1, ignores CRs and sends CR/LF for LF, newline. +// Always returns character sent. +int constexpr (*_rom_putc1)(int) = (int (*)(int))0x40001dcc; +void uart_buff_switch(uint8_t); + +int DEBUG_IRAM_ATTR _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int DEBUG_IRAM_ATTR _isr_safe_printf_P(const char *fmt, ...) { +#ifdef DEBUG_ESP_PORT +#define VALUE(x) __STRINGIFY(x) + // Preprocessor and compiler together will optimize away the if. + if (strcmp("Serial1", VALUE(DEBUG_ESP_PORT)) == 0) { + uart_buff_switch(1U); + } else { + uart_buff_switch(0U); + } +#else + uart_buff_switch(0U); // Side effect, clears RX FIFO +#endif + size_t str_len = ets_strlen(fmt); + size_t buf_len = (str_len + 1 + 3) & ~0x03U; + char ram_buf[buf_len] __attribute__ ((aligned(4))); + ets_memcpy(ram_buf, fmt, buf_len); + va_list argPtr; + va_start(argPtr, fmt); + int result = ets_vprintf(_rom_putc1, ram_buf, argPtr); + va_end(argPtr); + return result; +} + +#define printf(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) +#else // Macro to place constant strings into PROGMEM and print them properly #define printf(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) +#endif /* -- dbglog {{{ */ @@ -646,6 +751,10 @@ unsigned short int umm_numblocks = 0; #define UMM_PFREE(b) (UMM_BLOCK(b).body.free.prev) #define UMM_DATA(b) (UMM_BLOCK(b).body.data) +/* + * This does not look safe, no access locks. It currently is not being + * built, so not an immediate issue. - mjh 061019 + */ /* integrity check (UMM_INTEGRITY_CHECK) {{{ */ #if defined(UMM_INTEGRITY_CHECK) /* @@ -980,8 +1089,10 @@ static void *get_unpoisoned( void *vptr ) { */ UMM_HEAP_INFO ummHeapInfo; +unsigned short int ummHeapFreeBlocks; void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { + UMM_CRITICAL_DECL(id_info); unsigned short int blockNo = 0; @@ -990,7 +1101,7 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { } /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); + UMM_CRITICAL_ENTRY(id_info); /* * Clear out all of the entries in the ummHeapInfo structure before doing @@ -1051,7 +1162,7 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { if( ptr == &UMM_BLOCK(blockNo) ) { /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_info); return( ptr ); } @@ -1084,6 +1195,10 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { ummHeapInfo.maxFreeContiguousBlocks = curBlocks; } } +#if defined(DEBUG_ESP_PORT) + // This is a good place to verify we are calculating correctly + assert(ummHeapFreeBlocks == ummHeapInfo.freeBlocks); +#endif DBG_LOG_FORCE( force, "|0x%08lx|B %5d|NB %5d|PB %5d|Z %5d|NF %5d|PF %5d|\n", (unsigned long)(&UMM_BLOCK(blockNo)), @@ -1105,7 +1220,7 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { ummHeapInfo.freeBlocks ); /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_info); return( NULL ); } @@ -1223,6 +1338,9 @@ void ICACHE_FLASH_ATTR umm_init( void ) { /* index of the latest `umm_block` */ const unsigned short int block_last = UMM_NUMBLOCKS - 1; + /* init heapFreeBlocks */ + ummHeapFreeBlocks = block_last; + /* setup the 0th `umm_block`, which just points to the 1st */ UMM_NBLOCK(block_0th) = block_1th; UMM_NFREE(block_0th) = block_1th; @@ -1266,6 +1384,7 @@ void ICACHE_FLASH_ATTR umm_init( void ) { /* ------------------------------------------------------------------------ */ static void _umm_free( void *ptr ) { + UMM_CRITICAL_DECL(id_free); unsigned short int c; @@ -1286,15 +1405,18 @@ static void _umm_free( void *ptr ) { * on the free list! */ - /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); - /* Figure out which block we're in. Note the use of truncated division... */ c = (((char *)ptr)-(char *)(&(umm_heap[0])))/sizeof(umm_block); + /* Protect the critical section... */ + UMM_CRITICAL_ENTRY(id_free); + DBG_LOG_DEBUG( "Freeing block %6d\n", c ); + /* Update dynamic Free Block count */ + ummHeapFreeBlocks += (UMM_NBLOCK(c) - c); + /* Now let's assimilate this block with the next one if possible. */ umm_assimilate_up( c ); @@ -1346,12 +1468,14 @@ static void _umm_free( void *ptr ) { #endif /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_free); } /* ------------------------------------------------------------------------ */ static void *_umm_malloc( size_t size ) { + UMM_CRITICAL_DECL(id_malloc); + unsigned short int blocks; unsigned short int blockSize = 0; @@ -1377,9 +1501,6 @@ static void *_umm_malloc( size_t size ) { return( (void *)NULL ); } - /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); - blocks = umm_blocks( size ); /* @@ -1390,6 +1511,9 @@ static void *_umm_malloc( size_t size ) { * algorithm */ + /* Protect the critical section... */ + UMM_CRITICAL_ENTRY(id_malloc); + cf = UMM_NFREE(0); bestBlock = UMM_NFREE(0); @@ -1462,19 +1586,23 @@ static void *_umm_malloc( size_t size ) { UMM_PFREE( UMM_NFREE(cf) ) = cf + blocks; UMM_NFREE( cf + blocks ) = UMM_NFREE(cf); } + + /* Update dynamic Free Block count */ + ummHeapFreeBlocks -= blocks; + } else { /* Out of memory */ DBG_LOG_DEBUG( "Can't allocate %5d blocks\n", blocks ); /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_malloc); return( (void *)NULL ); } /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_malloc); return( (void *)&UMM_DATA(cf) ); } @@ -1482,6 +1610,7 @@ static void *_umm_malloc( size_t size ) { /* ------------------------------------------------------------------------ */ static void *_umm_realloc( void *ptr, size_t size ) { + UMM_CRITICAL_DECL(id_realloc); unsigned short int blocks; unsigned short int blockSize; @@ -1522,8 +1651,30 @@ static void *_umm_realloc( void *ptr, size_t size ) { return( (void *)NULL ); } - /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); + /* + * Defer starting critical section. + * + * Initially we should be safe without a critical section as long as we are + * referencing values that are within our allocation as constants. + * And only reference values that will not change, while the redefintions of + * the allocations around us change. + * + * Example UMM_PBLOCK() could be change by a call to malloc from an ISR. + * On the other hand UMM_NBLOCK() is safe returns an address of the next + * block. The calculation is all based on information within our allocation + * that remains constant, until we change it. + * + * As long as we don't try to modify the next block or walk the chain of + * blocks we are okay. + * + * When called by an "interrupts enabled" type caller, it bears the + * responsibility to not call again, with the allocate we are currently + * working on. I think this is a normal expectation. I could be wrong. + * Such a situation would involve a function that is called from foreground + * and ISR context. Such code would already have to be re-entrant. This + * change may expand the corner cases for such a function. + * + */ /* * Otherwise we need to actually do a reallocation. A naiive approach @@ -1559,12 +1710,12 @@ static void *_umm_realloc( void *ptr, size_t size ) { DBG_LOG_DEBUG( "realloc the same size block - %d, do nothing\n", blocks ); - /* Release the critical section... */ - UMM_CRITICAL_EXIT(); - return( ptr ); } + /* Now we need a critical section... */ + UMM_CRITICAL_ENTRY(id_realloc); + /* * Now we have a block size that could be bigger or smaller. Either * way, try to assimilate up to the next block before doing anything... @@ -1600,6 +1751,13 @@ static void *_umm_realloc( void *ptr, size_t size ) { c = umm_assimilate_down(c, 0); + /* + * For the ESP8266 interrupts should not be off for more than 10us. + * An unprotect/protect around memmove should be safe to do here. + * All variables used are on the stack. + */ + UMM_CRITICAL_EXIT(id_realloc); + /* * Move the bytes down to the new block we just created, but be sure to move * only the original bytes. @@ -1610,12 +1768,19 @@ static void *_umm_realloc( void *ptr, size_t size ) { /* And don't forget to adjust the pointer to the new block location! */ ptr = (void *)&UMM_DATA(c); + + UMM_CRITICAL_ENTRY(id_realloc); } + unsigned short int previousBlockSize = blockSize; + /* Now calculate the block size again...and we'll have three cases */ blockSize = (UMM_NBLOCK(c) - c); + /* Update dynamic Free Block count */ + ummHeapFreeBlocks -= (blockSize - previousBlockSize); + if( blockSize == blocks ) { /* This space intentionally left blank - return the original pointer! */ @@ -1630,7 +1795,13 @@ static void *_umm_realloc( void *ptr, size_t size ) { DBG_LOG_DEBUG( "realloc %d to a smaller block %d, shrink and free the leftover bits\n", blockSize, blocks ); umm_make_new_block( c, blocks, 0, 0 ); + + UMM_CRITICAL_EXIT(id_realloc); + _umm_free( (void *)&UMM_DATA(c+blocks) ); + + return( ptr ); + } else { /* New block is bigger than the old block... */ @@ -1643,15 +1814,18 @@ static void *_umm_realloc( void *ptr, size_t size ) { * free up the old block, but only if the malloc was sucessful! */ + UMM_CRITICAL_EXIT(id_realloc); + if( (ptr = _umm_malloc( size )) ) { memcpy( ptr, oldptr, curSize ); _umm_free( oldptr ); } + return( ptr ); } /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_realloc); return( ptr ); } @@ -1766,8 +1940,7 @@ void umm_free( void *ptr ) { /* ------------------------------------------------------------------------ */ size_t ICACHE_FLASH_ATTR umm_free_heap_size( void ) { - umm_info(NULL, 0); - return (size_t)ummHeapInfo.freeBlocks * sizeof(umm_block); + return (size_t)ummHeapFreeBlocks * sizeof(umm_block); } size_t ICACHE_FLASH_ATTR umm_max_block_size( void ) { diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index e7c573c877..4c066c8aae 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -110,8 +110,93 @@ extern char _heap_start; * called from within umm_malloc() */ -#define UMM_CRITICAL_ENTRY() ets_intr_lock() -#define UMM_CRITICAL_EXIT() ets_intr_unlock() +/* + Per Devyte, the core currently doesn't support masking a specific interrupt + level. That doesn't mean it can't be implemented, only that at this time + locking is implemented as all or nothing. + https://github.com/esp8266/Arduino/issues/6246#issuecomment-508612609 + + So for now we default to all, 15. + */ +#ifndef DEFAULT_CRITICAL_SECTION_INTLEVEL +#define DEFAULT_CRITICAL_SECTION_INTLEVEL 15 +#endif + +#define UMM_CRITICAL_PERIOD_ANALYZE + +#ifndef __STRINGIFY +#define __STRINGIFY(a) #a +#endif +/* + Copy paste xt_rsil and xt_wsr_ps from Arduino.h + */ +#ifndef xt_rsil +#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state)); state;})) +#endif +#ifndef xt_wsr_ps +#define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") +#endif + +#if !defined(UMM_CRITICAL_PERIOD_ANALYZE) +// This method preserves the intlevel on entry and restores the +// original intlevel at exit. +#define UMM_CRITICAL_DECL(tag) uint32_t _saved_ps_##tag +#define UMM_CRITICAL_ENTRY(tag) _saved_ps_##tag = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL) +#define UMM_CRITICAL_EXIT(tag) xt_wsr_ps(_saved_ps_##tag) + +#else +// This option adds support for gathering time locked data +typedef struct _TIME_STAT { + uint32_t min; + uint32_t max; + uint32_t start; + uint32_t intlevel; +} time_stat_t; + +struct _UMM_TIME_STATS { + time_stat_t id_malloc; + time_stat_t id_realloc; + time_stat_t id_free; + time_stat_t id_info; +}; + +bool get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size); + +static inline ICACHE_RAM_ATTR uint32_t GetCycleCount() { + uint32_t ccount; + // Not sure esync is needed before "rsr %0,CCOUNT". I don't see it in + // Espressf SDK or Xtensa clock.S file. + // __asm__ __volatile__("esync; rsr %0,ccount":"=a"(ccount)::"memory"); + __asm__ __volatile__("rsr %0,ccount":"=a"(ccount)::"memory"); + return ccount; +} + +static inline void _critical_entry(time_stat_t *p, uint32_t *saved_ps) { + *saved_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); + if (0U != (*saved_ps & 0x0FU)) { + p->intlevel += 1U; + // inflight_stack_trace(*saved_ps); + } + + p->start = GetCycleCount(); +} + +static inline void _critical_exit(time_stat_t *p, uint32_t *saved_ps) { + uint32_t elapse = GetCycleCount() - p->start; + if (elapse < p->min) + p->min = elapse; + + if (elapse > p->max) + p->max = elapse; + + xt_wsr_ps(*saved_ps); +} + +#define UMM_CRITICAL_DECL(tag) uint32_t _saved_ps_##tag +#define UMM_CRITICAL_ENTRY(tag) _critical_entry(&time_stats.tag, &_saved_ps_##tag) +#define UMM_CRITICAL_EXIT(tag) _critical_exit(&time_stats.tag, &_saved_ps_##tag) + +#endif /* * -D UMM_INTEGRITY_CHECK : From dc8fe13330636e10eb56c4ae56e6330506bdef17 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Mon, 8 Jul 2019 19:30:28 -0700 Subject: [PATCH 02/17] Fixed. travis build complaint. --- cores/esp8266/umm_malloc/umm_malloc_cfg.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index 4c066c8aae..97d2439c83 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -162,7 +162,7 @@ struct _UMM_TIME_STATS { bool get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size); -static inline ICACHE_RAM_ATTR uint32_t GetCycleCount() { +static inline ICACHE_RAM_ATTR uint32_t _umm_get_cycle_count() { uint32_t ccount; // Not sure esync is needed before "rsr %0,CCOUNT". I don't see it in // Espressf SDK or Xtensa clock.S file. @@ -178,11 +178,11 @@ static inline void _critical_entry(time_stat_t *p, uint32_t *saved_ps) { // inflight_stack_trace(*saved_ps); } - p->start = GetCycleCount(); + p->start = _umm_get_cycle_count(); } static inline void _critical_exit(time_stat_t *p, uint32_t *saved_ps) { - uint32_t elapse = GetCycleCount() - p->start; + uint32_t elapse = _umm_get_cycle_count() - p->start; if (elapse < p->min) p->min = elapse; From 7b0a0c8d7d3d0140ced106df254a189154efc892 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Tue, 9 Jul 2019 10:18:50 -0700 Subject: [PATCH 03/17] Changes for https://github.com/esp8266/Arduino/pull/6274#pullrequestreview-259579883 --- cores/esp8266/umm_malloc/umm_malloc.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 8ab283c5ef..7d8757f925 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -1409,11 +1409,11 @@ static void _umm_free( void *ptr ) { c = (((char *)ptr)-(char *)(&(umm_heap[0])))/sizeof(umm_block); + DBG_LOG_DEBUG( "Freeing block %6d\n", c ); + /* Protect the critical section... */ UMM_CRITICAL_ENTRY(id_free); - DBG_LOG_DEBUG( "Freeing block %6d\n", c ); - /* Update dynamic Free Block count */ ummHeapFreeBlocks += (UMM_NBLOCK(c) - c); @@ -1591,13 +1591,13 @@ static void *_umm_malloc( size_t size ) { ummHeapFreeBlocks -= blocks; } else { + /* Release the critical section... */ + UMM_CRITICAL_EXIT(id_malloc); + /* Out of memory */ DBG_LOG_DEBUG( "Can't allocate %5d blocks\n", blocks ); - /* Release the critical section... */ - UMM_CRITICAL_EXIT(id_malloc); - return( (void *)NULL ); } From 472b1b45186b289039ac92b968f18f9e34a79767 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Tue, 9 Jul 2019 17:05:00 -0700 Subject: [PATCH 04/17] Added requested comment and missing comment for UMM_CRITICAL_PERIOD_ANALYZE. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 9 ++++++++- cores/esp8266/umm_malloc/umm_malloc_cfg.h | 12 ++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 7d8757f925..548ef5e113 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -585,7 +585,7 @@ extern int umm_last_fail_alloc_size; #if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) #define DEBUG_IRAM_ATTR ICACHE_RAM_ATTR /* - Printing from the malloc routines is tricky. Since a lot of library calls + Printing from the malloc routines is tricky. Since a lot of library calls will want to do malloc. Objective: To be able to print "last gasp" diagnostic messages @@ -610,6 +610,13 @@ int DEBUG_IRAM_ATTR _isr_safe_printf_P(const char *fmt, ...) { #else uart_buff_switch(0U); // Side effect, clears RX FIFO #endif + /* + To use ets_strlen() and ets_memcpy() safely with PROGMEM, flash storage, + the PROGMEM address must be word (4 bytes) aligned. The destination + address for ets_memcpy must also be word-aligned. We also round the + buf_len up to the nearest word boundary. So that all transfers will be + whole words. + */ size_t str_len = ets_strlen(fmt); size_t buf_len = (str_len + 1 + 3) & ~0x03U; char ram_buf[buf_len] __attribute__ ((aligned(4))); diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index 97d2439c83..1c47aef5ad 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -122,7 +122,16 @@ extern char _heap_start; #define DEFAULT_CRITICAL_SECTION_INTLEVEL 15 #endif -#define UMM_CRITICAL_PERIOD_ANALYZE +/* + * -D UMM_CRITICAL_PERIOD_ANALYZE : + * + * Build option to collect timing usage data on critical section usage in + * functions: info, malloc, realloc. Collects MIN, MAX, and number of time + * IRQs were disabled at request time. Note, IRQs disabled value can be + * inflated by calls to realloc. realloc may call malloc and/or free. + * Examine code for specifics on what info is available and how to access. +*/ +// #define UMM_CRITICAL_PERIOD_ANALYZE #ifndef __STRINGIFY #define __STRINGIFY(a) #a @@ -175,7 +184,6 @@ static inline void _critical_entry(time_stat_t *p, uint32_t *saved_ps) { *saved_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); if (0U != (*saved_ps & 0x0FU)) { p->intlevel += 1U; - // inflight_stack_trace(*saved_ps); } p->start = _umm_get_cycle_count(); From 1de2ec4ac1d6b296cb9394f41c1676b58b42716c Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Wed, 10 Jul 2019 09:07:24 -0700 Subject: [PATCH 05/17] Updated comments and update xt_rsil() --- cores/esp8266/umm_malloc/umm_malloc.cpp | 13 ++++++++----- cores/esp8266/umm_malloc/umm_malloc_cfg.h | 6 +++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 548ef5e113..27a30f2fc3 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -541,13 +541,13 @@ extern int umm_last_fail_alloc_size; #endif /* -Changes: +Changes for July 2019: Correct critical section with interrupt level preserving and nest support alternative. Replace ets_intr_lock()/ets_intr_unlock() with uint32_t oldValue=xt_rsil(3)/xt_wrs(oldValue). Added UMM_CRITICAL_DECL macro to define storage for current state. Expanded UMM_CRITICAL_... to use unique - identifiers. This helpt facilitate gather function specific timing + identifiers. This helpt facilitate gather function specific timing information. Replace printf with something that is ROM or IRAM based so that a printf @@ -560,11 +560,14 @@ extern int umm_last_fail_alloc_size; 80MHz CPU clock). It would be good practice for an ISR to avoid realloc. Note, while doing this might initially sound scary, this appears to be very stable. It ran on my troublesome sketch for over 3 weeks until I got back from - vacation and flashed an update. Troublesome sketch - runs ESPAsyncTCP, with + vacation and flashed an update. Troublesome sketch - runs ESPAsyncTCP, with modified fauxmo emulation for 10 devices. It receives lost of Network traffic related to uPnP scans, which includes lots of TCP connects disconnects RSTs related to uPnP discovery. + Locking is no longer nested in realloc, due to refactoring for reduced IRQ + off time. + I have clocked umm_info critical lock time taking as much as 180us. A common use for the umm_info call is to get the free heap result. It is common to try and closely monitor free heap as a method to detect memory leaks. @@ -574,10 +577,10 @@ extern int umm_last_fail_alloc_size; I have added code that adjusts the running free heap number from _umm_malloc, _umm_realloc, and _umm_free. Removing the need to do a long interrupts - disabled calculation via _umm_info. + disabled calculation via umm_info. Build optional, min/max time measurements for locks held while in info, - malloc, realloc, and free. Also, maintain a count of how many times each is + malloc, realloc, and free. Also, maintains a count of how many times each is called with INTLEVEL set. */ diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index 1c47aef5ad..42c848f268 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -127,8 +127,8 @@ extern char _heap_start; * * Build option to collect timing usage data on critical section usage in * functions: info, malloc, realloc. Collects MIN, MAX, and number of time - * IRQs were disabled at request time. Note, IRQs disabled value can be - * inflated by calls to realloc. realloc may call malloc and/or free. + * IRQs were disabled at request time. Note, for realloc MAX disabled time + * will not include the time from calling malloc and/or free. * Examine code for specifics on what info is available and how to access. */ // #define UMM_CRITICAL_PERIOD_ANALYZE @@ -140,7 +140,7 @@ extern char _heap_start; Copy paste xt_rsil and xt_wsr_ps from Arduino.h */ #ifndef xt_rsil -#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state)); state;})) +#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state) :: "memory"); state;})) #endif #ifndef xt_wsr_ps #define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") From 08cc09e6ac746218942eb265fcb8fc94e785c554 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Tue, 16 Jul 2019 09:01:19 -0700 Subject: [PATCH 06/17] Moved xt_rsil&co (pulled in __STRINGIFY) definitions out of Arduino.h, to cores/esp8266/core_esp8266_features.h Added esp_get_cycle_count() to core_esp8266_features.h. Updated umm_malloc and Esp.h to use new defines and location. --- cores/esp8266/Arduino.h | 21 ------------------ cores/esp8266/Esp.h | 4 +--- cores/esp8266/core_esp8266_features.h | 25 ++++++++++++++++++++- cores/esp8266/umm_malloc/umm_malloc_cfg.h | 27 +++-------------------- 4 files changed, 28 insertions(+), 49 deletions(-) diff --git a/cores/esp8266/Arduino.h b/cores/esp8266/Arduino.h index d962923de8..a8b86c278a 100644 --- a/cores/esp8266/Arduino.h +++ b/cores/esp8266/Arduino.h @@ -142,30 +142,9 @@ void timer0_detachInterrupt(void); void ets_intr_lock(); void ets_intr_unlock(); -#ifndef __STRINGIFY -#define __STRINGIFY(a) #a -#endif - -// these low level routines provide a replacement for SREG interrupt save that AVR uses -// but are esp8266 specific. A normal use pattern is like -// -//{ -// uint32_t savedPS = xt_rsil(1); // this routine will allow level 2 and above -// // do work here -// xt_wsr_ps(savedPS); // restore the state -//} -// -// level (0-15), interrupts of the given level and above will be active -// level 15 will disable ALL interrupts, -// level 0 will enable ALL interrupts, -// -#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state) :: "memory"); state;})) -#define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") - #define interrupts() xt_rsil(0) #define noInterrupts() xt_rsil(15) - #define clockCyclesPerMicrosecond() ( F_CPU / 1000000L ) #define clockCyclesToMicroseconds(a) ( (a) / clockCyclesPerMicrosecond() ) #define microsecondsToClockCycles(a) ( (a) * clockCyclesPerMicrosecond() ) diff --git a/cores/esp8266/Esp.h b/cores/esp8266/Esp.h index bc241eb392..46af2a8cb1 100644 --- a/cores/esp8266/Esp.h +++ b/cores/esp8266/Esp.h @@ -209,9 +209,7 @@ class EspClass { #ifndef CORE_MOCK uint32_t EspClass::getCycleCount() { - uint32_t ccount; - __asm__ __volatile__("esync; rsr %0,ccount":"=a" (ccount)); - return ccount; + return esp_get_cycle_count(); } #endif // !defined(CORE_MOCK) diff --git a/cores/esp8266/core_esp8266_features.h b/cores/esp8266/core_esp8266_features.h index aff8fa4de9..f2531509b9 100644 --- a/cores/esp8266/core_esp8266_features.h +++ b/cores/esp8266/core_esp8266_features.h @@ -33,7 +33,30 @@ #define WIFI_HAS_EVENT_CALLBACK +#ifndef __STRINGIFY +#define __STRINGIFY(a) #a +#endif +// these low level routines provide a replacement for SREG interrupt save that AVR uses +// but are esp8266 specific. A normal use pattern is like +// +//{ +// uint32_t savedPS = xt_rsil(1); // this routine will allow level 2 and above +// // do work here +// xt_wsr_ps(savedPS); // restore the state +//} +// +// level (0-15), interrupts of the given level and above will be active +// level 15 will disable ALL interrupts, +// level 0 will enable ALL interrupts, +// +#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state) :: "memory"); state;})) +#define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") + +inline uint32_t esp_get_cycle_count() { + uint32_t ccount; + __asm__ __volatile__("rsr %0,ccount":"=a"(ccount)); + return ccount; +} #endif - diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index 27f80e2712..dbb865af75 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -10,6 +10,7 @@ extern "C" { #endif +#include #include #include @@ -133,19 +134,6 @@ extern char _heap_start[]; */ // #define UMM_CRITICAL_PERIOD_ANALYZE -#ifndef __STRINGIFY -#define __STRINGIFY(a) #a -#endif -/* - Copy paste xt_rsil and xt_wsr_ps from Arduino.h - */ -#ifndef xt_rsil -#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state) :: "memory"); state;})) -#endif -#ifndef xt_wsr_ps -#define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") -#endif - #if !defined(UMM_CRITICAL_PERIOD_ANALYZE) // This method preserves the intlevel on entry and restores the // original intlevel at exit. @@ -171,26 +159,17 @@ struct _UMM_TIME_STATS { bool get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size); -static inline ICACHE_RAM_ATTR uint32_t _umm_get_cycle_count() { - uint32_t ccount; - // Not sure esync is needed before "rsr %0,CCOUNT". I don't see it in - // Espressf SDK or Xtensa clock.S file. - // __asm__ __volatile__("esync; rsr %0,ccount":"=a"(ccount)::"memory"); - __asm__ __volatile__("rsr %0,ccount":"=a"(ccount)::"memory"); - return ccount; -} - static inline void _critical_entry(time_stat_t *p, uint32_t *saved_ps) { *saved_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); if (0U != (*saved_ps & 0x0FU)) { p->intlevel += 1U; } - p->start = _umm_get_cycle_count(); + p->start = esp_get_cycle_count(); } static inline void _critical_exit(time_stat_t *p, uint32_t *saved_ps) { - uint32_t elapse = _umm_get_cycle_count() - p->start; + uint32_t elapse = esp_get_cycle_count() - p->start; if (elapse < p->min) p->min = elapse; From 2d399118603168e39cc320a070508b29f8e590b0 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Tue, 16 Jul 2019 16:09:47 -0700 Subject: [PATCH 07/17] Added "#ifndef CORE_MOCK" around conflicted area. --- cores/esp8266/core_esp8266_features.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cores/esp8266/core_esp8266_features.h b/cores/esp8266/core_esp8266_features.h index f2531509b9..3b4289006a 100644 --- a/cores/esp8266/core_esp8266_features.h +++ b/cores/esp8266/core_esp8266_features.h @@ -50,6 +50,7 @@ // level 15 will disable ALL interrupts, // level 0 will enable ALL interrupts, // +#ifndef CORE_MOCK #define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state) :: "memory"); state;})) #define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") @@ -58,5 +59,6 @@ inline uint32_t esp_get_cycle_count() { __asm__ __volatile__("rsr %0,ccount":"=a"(ccount)); return ccount; } +#endif // not CORE_MOCK #endif From 9d35f34439646d85d03b4dc3737aee406f40897e Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Mon, 22 Jul 2019 10:22:29 -0700 Subject: [PATCH 08/17] Moved performance measurment and ESP specific definitions to umm_performance.h/cpp. Removed testing asserts. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 80 +--------------- cores/esp8266/umm_malloc/umm_malloc_cfg.h | 70 ++------------ cores/esp8266/umm_malloc/umm_performance.cpp | 78 ++++++++++++++++ cores/esp8266/umm_malloc/umm_performance.h | 98 ++++++++++++++++++++ 4 files changed, 185 insertions(+), 141 deletions(-) create mode 100644 cores/esp8266/umm_malloc/umm_performance.cpp create mode 100644 cores/esp8266/umm_malloc/umm_performance.h diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 27a30f2fc3..324bb56c0a 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -495,33 +495,13 @@ #include #include +#define _UMM_MALLOC_CPP #include "umm_malloc.h" #include "umm_malloc_cfg.h" /* user-dependent */ -#if defined(DEBUG_ESP_PORT) -#include -#endif extern "C" { -#ifdef UMM_CRITICAL_PERIOD_ANALYZE -static struct _UMM_TIME_STATS time_stats = { - {0xFFFFFFFF, 0U, 0U, 0U}, - {0xFFFFFFFF, 0U, 0U, 0U}, - {0xFFFFFFFF, 0U, 0U, 0U}, - {0xFFFFFFFF, 0U, 0U, 0U} }; - -bool ICACHE_FLASH_ATTR get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size) { - if (p && size != 0) { - uint32_t save_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); - memcpy(p, &time_stats, size); - xt_wsr_ps(save_ps); - return true; - } - return false; -} -#endif - // From UMM, the last caller of a malloc/realloc/calloc which failed: extern void *umm_last_fail_alloc_addr; extern int umm_last_fail_alloc_size; @@ -575,7 +555,7 @@ Changes for July 2019: test case that shows an issue yet; however, I and others think they are or have had crashes related to this. - I have added code that adjusts the running free heap number from _umm_malloc, + I have added code that updates a current free heap value from _umm_malloc, _umm_realloc, and _umm_free. Removing the need to do a long interrupts disabled calculation via umm_info. @@ -585,58 +565,6 @@ Changes for July 2019: */ -#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) -#define DEBUG_IRAM_ATTR ICACHE_RAM_ATTR -/* - Printing from the malloc routines is tricky. Since a lot of library calls - will want to do malloc. - - Objective: To be able to print "last gasp" diagnostic messages - when interrupts are disabled and w/o availability of heap resources. -*/ - -// ROM _putc1, ignores CRs and sends CR/LF for LF, newline. -// Always returns character sent. -int constexpr (*_rom_putc1)(int) = (int (*)(int))0x40001dcc; -void uart_buff_switch(uint8_t); - -int DEBUG_IRAM_ATTR _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); -int DEBUG_IRAM_ATTR _isr_safe_printf_P(const char *fmt, ...) { -#ifdef DEBUG_ESP_PORT -#define VALUE(x) __STRINGIFY(x) - // Preprocessor and compiler together will optimize away the if. - if (strcmp("Serial1", VALUE(DEBUG_ESP_PORT)) == 0) { - uart_buff_switch(1U); - } else { - uart_buff_switch(0U); - } -#else - uart_buff_switch(0U); // Side effect, clears RX FIFO -#endif - /* - To use ets_strlen() and ets_memcpy() safely with PROGMEM, flash storage, - the PROGMEM address must be word (4 bytes) aligned. The destination - address for ets_memcpy must also be word-aligned. We also round the - buf_len up to the nearest word boundary. So that all transfers will be - whole words. - */ - size_t str_len = ets_strlen(fmt); - size_t buf_len = (str_len + 1 + 3) & ~0x03U; - char ram_buf[buf_len] __attribute__ ((aligned(4))); - ets_memcpy(ram_buf, fmt, buf_len); - va_list argPtr; - va_start(argPtr, fmt); - int result = ets_vprintf(_rom_putc1, ram_buf, argPtr); - va_end(argPtr); - return result; -} - -#define printf(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) -#else -// Macro to place constant strings into PROGMEM and print them properly -#define printf(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) -#endif - /* -- dbglog {{{ */ /* ---------------------------------------------------------------------------- @@ -1205,10 +1133,6 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { ummHeapInfo.maxFreeContiguousBlocks = curBlocks; } } -#if defined(DEBUG_ESP_PORT) - // This is a good place to verify we are calculating correctly - assert(ummHeapFreeBlocks == ummHeapInfo.freeBlocks); -#endif DBG_LOG_FORCE( force, "|0x%08lx|B %5d|NB %5d|PB %5d|Z %5d|NF %5d|PF %5d|\n", (unsigned long)(&UMM_BLOCK(blockNo)), diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index dbb865af75..155efdef60 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -15,6 +15,7 @@ extern "C" { #include #include "c_types.h" +#include "umm_performance.h" /* * There are a number of defines you can set at compile time that affect how * the memory allocator will operate. @@ -111,78 +112,21 @@ extern char _heap_start[]; * called from within umm_malloc() */ -/* - Per Devyte, the core currently doesn't support masking a specific interrupt - level. That doesn't mean it can't be implemented, only that at this time - locking is implemented as all or nothing. - https://github.com/esp8266/Arduino/issues/6246#issuecomment-508612609 - So for now we default to all, 15. - */ -#ifndef DEFAULT_CRITICAL_SECTION_INTLEVEL -#define DEFAULT_CRITICAL_SECTION_INTLEVEL 15 -#endif +#if defined(UMM_CRITICAL_PERIOD_ANALYZE) -/* - * -D UMM_CRITICAL_PERIOD_ANALYZE : - * - * Build option to collect timing usage data on critical section usage in - * functions: info, malloc, realloc. Collects MIN, MAX, and number of time - * IRQs were disabled at request time. Note, for realloc MAX disabled time - * will not include the time from calling malloc and/or free. - * Examine code for specifics on what info is available and how to access. -*/ -// #define UMM_CRITICAL_PERIOD_ANALYZE +#define UMM_CRITICAL_DECL(tag) uint32_t _saved_ps_##tag +#define UMM_CRITICAL_ENTRY(tag) _critical_entry(&time_stats.tag, &_saved_ps_##tag) +#define UMM_CRITICAL_EXIT(tag) _critical_exit(&time_stats.tag, &_saved_ps_##tag) + +#else -#if !defined(UMM_CRITICAL_PERIOD_ANALYZE) // This method preserves the intlevel on entry and restores the // original intlevel at exit. #define UMM_CRITICAL_DECL(tag) uint32_t _saved_ps_##tag #define UMM_CRITICAL_ENTRY(tag) _saved_ps_##tag = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL) #define UMM_CRITICAL_EXIT(tag) xt_wsr_ps(_saved_ps_##tag) -#else -// This option adds support for gathering time locked data -typedef struct _TIME_STAT { - uint32_t min; - uint32_t max; - uint32_t start; - uint32_t intlevel; -} time_stat_t; - -struct _UMM_TIME_STATS { - time_stat_t id_malloc; - time_stat_t id_realloc; - time_stat_t id_free; - time_stat_t id_info; -}; - -bool get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size); - -static inline void _critical_entry(time_stat_t *p, uint32_t *saved_ps) { - *saved_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); - if (0U != (*saved_ps & 0x0FU)) { - p->intlevel += 1U; - } - - p->start = esp_get_cycle_count(); -} - -static inline void _critical_exit(time_stat_t *p, uint32_t *saved_ps) { - uint32_t elapse = esp_get_cycle_count() - p->start; - if (elapse < p->min) - p->min = elapse; - - if (elapse > p->max) - p->max = elapse; - - xt_wsr_ps(*saved_ps); -} - -#define UMM_CRITICAL_DECL(tag) uint32_t _saved_ps_##tag -#define UMM_CRITICAL_ENTRY(tag) _critical_entry(&time_stats.tag, &_saved_ps_##tag) -#define UMM_CRITICAL_EXIT(tag) _critical_exit(&time_stats.tag, &_saved_ps_##tag) - #endif /* diff --git a/cores/esp8266/umm_malloc/umm_performance.cpp b/cores/esp8266/umm_malloc/umm_performance.cpp new file mode 100644 index 0000000000..1999ad0bdb --- /dev/null +++ b/cores/esp8266/umm_malloc/umm_performance.cpp @@ -0,0 +1,78 @@ +/* + * umm_malloc performance measurments and ESP specifics + */ + +#include +#include +#include +#include +#include "umm_performance.h" + +extern "C" { + +#ifdef UMM_CRITICAL_PERIOD_ANALYZE +struct _UMM_TIME_STATS time_stats = { + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U} }; + +bool ICACHE_FLASH_ATTR get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size) { + if (p && sizeof(time_stats) == size) { + uint32_t save_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); + memcpy(p, &time_stats, size); + xt_wsr_ps(save_ps); + return true; + } + return false; +} +#endif + +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +/* + Printing from the malloc routines is tricky. Since a lot of library calls + will want to do malloc. + + Objective: To be able to print "last gasp" diagnostic messages + when interrupts are disabled and w/o availability of heap resources. +*/ + +// ROM _putc1, ignores CRs and sends CR/LF for LF, newline. +// Always returns character sent. +int constexpr (*_rom_putc1)(int) = (int (*)(int))0x40001dcc; +void uart_buff_switch(uint8_t); + +int _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int ICACHE_RAM_ATTR _isr_safe_printf_P(const char *fmt, ...) { +#ifdef DEBUG_ESP_PORT +#define VALUE(x) __STRINGIFY(x) + // Preprocessor and compiler together will optimize away the if. + if (strcmp("Serial1", VALUE(DEBUG_ESP_PORT)) == 0) { + uart_buff_switch(1U); + } else { + uart_buff_switch(0U); + } +#else + uart_buff_switch(0U); // Side effect, clears RX FIFO +#endif + /* + To use ets_strlen() and ets_memcpy() safely with PROGMEM, flash storage, + the PROGMEM address must be word (4 bytes) aligned. The destination + address for ets_memcpy must also be word-aligned. We also round the + buf_len up to the nearest word boundary. So that all transfers will be + whole words. + */ + size_t str_len = ets_strlen(fmt); + size_t buf_len = (str_len + 1 + 3) & ~0x03U; + char ram_buf[buf_len] __attribute__ ((aligned(4))); + ets_memcpy(ram_buf, fmt, buf_len); + va_list argPtr; + va_start(argPtr, fmt); + int result = ets_vprintf(_rom_putc1, ram_buf, argPtr); + va_end(argPtr); + return result; +} + +#endif + +}; diff --git a/cores/esp8266/umm_malloc/umm_performance.h b/cores/esp8266/umm_malloc/umm_performance.h new file mode 100644 index 0000000000..c2556f5166 --- /dev/null +++ b/cores/esp8266/umm_malloc/umm_performance.h @@ -0,0 +1,98 @@ +/* + * umm_malloc performance measurments and ESP specifics + */ + +#ifndef _UMM_PERFORMANCE_H +#define _UMM_PERFORMANCE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * -D UMM_CRITICAL_PERIOD_ANALYZE : + * + * Build option to collect timing usage data on critical section usage in + * functions: info, malloc, realloc. Collects MIN, MAX, and number of time + * IRQs were disabled at request time. Note, for realloc MAX disabled time + * will not include the time from calling malloc and/or free. + * Examine code for specifics on what info is available and how to access. +*/ +#define UMM_CRITICAL_PERIOD_ANALYZE + + +/* + Per Devyte, the core currently doesn't support masking a specific interrupt + level. That doesn't mean it can't be implemented, only that at this time + locking is implemented as all or nothing. + https://github.com/esp8266/Arduino/issues/6246#issuecomment-508612609 + + So for now we default to all, 15. + */ +#ifndef DEFAULT_CRITICAL_SECTION_INTLEVEL +#define DEFAULT_CRITICAL_SECTION_INTLEVEL 15 +#endif + +#if defined(UMM_CRITICAL_PERIOD_ANALYZE) +// This option adds support for gathering time locked data +typedef struct _TIME_STAT { + uint32_t min; + uint32_t max; + uint32_t start; + uint32_t intlevel; +} time_stat_t; + +struct _UMM_TIME_STATS { + time_stat_t id_malloc; + time_stat_t id_realloc; + time_stat_t id_free; + time_stat_t id_info; +}; + +extern struct _UMM_TIME_STATS time_stats; + +bool get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size); + +static inline void _critical_entry(time_stat_t *p, uint32_t *saved_ps) { + *saved_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); + if (0U != (*saved_ps & 0x0FU)) { + p->intlevel += 1U; + } + + p->start = esp_get_cycle_count(); +} + +static inline void _critical_exit(time_stat_t *p, uint32_t *saved_ps) { + uint32_t elapse = esp_get_cycle_count() - p->start; + if (elapse < p->min) + p->min = elapse; + + if (elapse > p->max) + p->max = elapse; + + xt_wsr_ps(*saved_ps); +} +#endif + +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +int _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +#endif + +// Use _UMM_MALLOC_CPP to limit access to umm_malloc.cpp module specific +// definitions that are hardware and platform specific. +#ifdef _UMM_MALLOC_CPP + +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +#define printf(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) +#else +// Macro to place constant strings into PROGMEM and print them properly +#define printf(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _UMM_PERFORMANCE_H */ From e69676c9d458b05b1458ee3f350a17da2338397a Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Tue, 23 Jul 2019 15:12:59 -0700 Subject: [PATCH 09/17] Commented out umm analyze. Delay CRITICAL_SECTION_EXIT() in umm_realloc() to avoid exposing a transient OOM condition to ISR. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 2 +- cores/esp8266/umm_malloc/umm_performance.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 324bb56c0a..4dd4217a7b 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -691,7 +691,7 @@ unsigned short int umm_numblocks = 0; /* * This does not look safe, no access locks. It currently is not being - * built, so not an immediate issue. - mjh 061019 + * built, so not an immediate issue. - 06/10/19 */ /* integrity check (UMM_INTEGRITY_CHECK) {{{ */ #if defined(UMM_INTEGRITY_CHECK) diff --git a/cores/esp8266/umm_malloc/umm_performance.h b/cores/esp8266/umm_malloc/umm_performance.h index c2556f5166..c8897eef11 100644 --- a/cores/esp8266/umm_malloc/umm_performance.h +++ b/cores/esp8266/umm_malloc/umm_performance.h @@ -18,7 +18,7 @@ extern "C" { * will not include the time from calling malloc and/or free. * Examine code for specifics on what info is available and how to access. */ -#define UMM_CRITICAL_PERIOD_ANALYZE +// #define UMM_CRITICAL_PERIOD_ANALYZE /* From ed49b3c0417efe0c9fbcb3e8792e62e29f13956d Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Fri, 26 Jul 2019 22:41:18 -0700 Subject: [PATCH 10/17] Missed file change. This commit has: Delay CRITICAL_SECTION_EXIT() in umm_realloc() to avoid exposing a transient OOM condition to ISR. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 4dd4217a7b..53103bd170 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -1730,12 +1730,8 @@ static void *_umm_realloc( void *ptr, size_t size ) { umm_make_new_block( c, blocks, 0, 0 ); - UMM_CRITICAL_EXIT(id_realloc); - _umm_free( (void *)&UMM_DATA(c+blocks) ); - return( ptr ); - } else { /* New block is bigger than the old block... */ From a937f1ef2b1aa6b9a7d9ceabf5a8174ce9ccdc37 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Sat, 27 Jul 2019 09:22:42 -0700 Subject: [PATCH 11/17] 2nd Path. Removed early release of critical section around memmove to avoid a possible OOM for an ISR. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 53103bd170..fe4df269bf 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -1685,13 +1685,6 @@ static void *_umm_realloc( void *ptr, size_t size ) { c = umm_assimilate_down(c, 0); - /* - * For the ESP8266 interrupts should not be off for more than 10us. - * An unprotect/protect around memmove should be safe to do here. - * All variables used are on the stack. - */ - UMM_CRITICAL_EXIT(id_realloc); - /* * Move the bytes down to the new block we just created, but be sure to move * only the original bytes. @@ -1702,8 +1695,6 @@ static void *_umm_realloc( void *ptr, size_t size ) { /* And don't forget to adjust the pointer to the new block location! */ ptr = (void *)&UMM_DATA(c); - - UMM_CRITICAL_ENTRY(id_realloc); } unsigned short int previousBlockSize = blockSize; @@ -1729,9 +1720,7 @@ static void *_umm_realloc( void *ptr, size_t size ) { DBG_LOG_DEBUG( "realloc %d to a smaller block %d, shrink and free the leftover bits\n", blockSize, blocks ); umm_make_new_block( c, blocks, 0, 0 ); - _umm_free( (void *)&UMM_DATA(c+blocks) ); - } else { /* New block is bigger than the old block... */ From 909c6b93c8be61d64137a905767b598ab7d968ab Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Sat, 27 Jul 2019 09:37:06 -0700 Subject: [PATCH 12/17] improved variable name --- cores/esp8266/umm_malloc/umm_malloc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index fe4df269bf..986be89721 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -1697,14 +1697,14 @@ static void *_umm_realloc( void *ptr, size_t size ) { ptr = (void *)&UMM_DATA(c); } - unsigned short int previousBlockSize = blockSize; + unsigned short int startingBlockSize = blockSize; /* Now calculate the block size again...and we'll have three cases */ blockSize = (UMM_NBLOCK(c) - c); /* Update dynamic Free Block count */ - ummHeapFreeBlocks -= (blockSize - previousBlockSize); + ummHeapFreeBlocks -= (blockSize - startingBlockSize); if( blockSize == blocks ) { /* This space intentionally left blank - return the original pointer! */ From 996ac7601a67dc967a94cd8894bc4689c51bd0f9 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Thu, 1 Aug 2019 14:39:32 -0700 Subject: [PATCH 13/17] Resolved ISR OOM concern with `_umm_realloc()` Updated realloc() to do a preliminary free() of unused space, before performing a critical section exit and memmove. This change was applied to the current _umm_realloc(). This change should reduce the risk of an ISR getting an OOM, during a realloc memmove operation. Added additional stats for verifying correct operation. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 112 ++++++++++++++++--- cores/esp8266/umm_malloc/umm_malloc_cfg.h | 2 + cores/esp8266/umm_malloc/umm_performance.cpp | 3 + cores/esp8266/umm_malloc/umm_stats.h | 36 ++++++ 4 files changed, 135 insertions(+), 18 deletions(-) create mode 100644 cores/esp8266/umm_malloc/umm_stats.h diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 986be89721..74e8e0b6fe 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -1027,7 +1027,6 @@ static void *get_unpoisoned( void *vptr ) { */ UMM_HEAP_INFO ummHeapInfo; -unsigned short int ummHeapFreeBlocks; void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { UMM_CRITICAL_DECL(id_info); @@ -1153,6 +1152,21 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { ummHeapInfo.usedBlocks, ummHeapInfo.freeBlocks ); + if (ummHeapInfo.freeBlocks == ummStats.free_blocks) { + DBG_LOG_FORCE( force, "\nheap info Free blocks and heap statistics Free blocks match.\n"); + } else { + DBG_LOG_FORCE( force, "\nheap info Free blocks %5d != heap statistics Free Blocks %5d\n\n", + ummHeapInfo.freeBlocks, + ummStats.free_blocks ); + } + + DBG_LOG_FORCE( force, "\numm heap statistics:\n"); + DBG_LOG_FORCE( force, " Free Space %5u\n", ummStats.free_blocks * sizeof(umm_block)); + DBG_LOG_FORCE( force, " Low Watermark %5u\n", ummStats.free_blocks_min * sizeof(umm_block)); + DBG_LOG_FORCE( force, " MAX Alloc Request %5u\n", ummStats.alloc_max_size); + DBG_LOG_FORCE( force, " OOM Count %5u\n", ummStats.oom_count); + DBG_LOG_FORCE( force, " Size of umm_block %5u\n", sizeof(umm_block)); + /* Release the critical section... */ UMM_CRITICAL_EXIT(id_info); @@ -1272,8 +1286,8 @@ void ICACHE_FLASH_ATTR umm_init( void ) { /* index of the latest `umm_block` */ const unsigned short int block_last = UMM_NUMBLOCKS - 1; - /* init heapFreeBlocks */ - ummHeapFreeBlocks = block_last; + /* init ummStats */ + ummStats.free_blocks = ummStats.free_blocks_min = block_last; /* setup the 0th `umm_block`, which just points to the 1st */ UMM_NBLOCK(block_0th) = block_1th; @@ -1348,8 +1362,8 @@ static void _umm_free( void *ptr ) { /* Protect the critical section... */ UMM_CRITICAL_ENTRY(id_free); - /* Update dynamic Free Block count */ - ummHeapFreeBlocks += (UMM_NBLOCK(c) - c); + /* Update heap statistics */ + ummStats.free_blocks += (UMM_NBLOCK(c) - c); /* Now let's assimilate this block with the next one if possible. */ @@ -1435,6 +1449,9 @@ static void *_umm_malloc( size_t size ) { return( (void *)NULL ); } + if ( size > ummStats.alloc_max_size ) + ummStats.alloc_max_size = size; + blocks = umm_blocks( size ); /* @@ -1521,10 +1538,14 @@ static void *_umm_malloc( size_t size ) { UMM_NFREE( cf + blocks ) = UMM_NFREE(cf); } - /* Update dynamic Free Block count */ - ummHeapFreeBlocks -= blocks; + /* Update heap statistics */ + ummStats.free_blocks -= blocks; + if (ummStats.free_blocks < ummStats.free_blocks_min) + ummStats.free_blocks_min = ummStats.free_blocks; } else { + ummStats.oom_count += 1; + /* Release the critical section... */ UMM_CRITICAL_EXIT(id_malloc); @@ -1548,7 +1569,6 @@ static void *_umm_realloc( void *ptr, size_t size ) { unsigned short int blocks; unsigned short int blockSize; - unsigned short int c; size_t curSize; @@ -1585,6 +1605,9 @@ static void *_umm_realloc( void *ptr, size_t size ) { return( (void *)NULL ); } + if ( size > ummStats.alloc_max_size ) + ummStats.alloc_max_size = size; + /* * Defer starting critical section. * @@ -1658,7 +1681,13 @@ static void *_umm_realloc( void *ptr, size_t size ) { * assimilation step later in free :-) */ - umm_assimilate_up( c ); + if( UMM_NBLOCK(UMM_NBLOCK(c)) & UMM_FREELIST_MASK ) { + // This will often be most of the free heap. The excess is + // restored when umm_free() is called before returning. + ummStats.free_blocks -= + (UMM_NBLOCK(UMM_NBLOCK(c)) & UMM_BLOCKNO_MASK) - UMM_NBLOCK(c); + umm_assimilate_up( c ); + } /* * Now check if it might help to assimilate down, but don't actually @@ -1674,6 +1703,17 @@ static void *_umm_realloc( void *ptr, size_t size ) { DBG_LOG_DEBUG( "realloc() could assimilate down %d blocks - fits!\n\r", c-UMM_PBLOCK(c) ); + /* + * Calculate the number of blocks to keep while the information is + * still available. + */ + + unsigned short int prevBlockSize = c - UMM_PBLOCK(c); + ummStats.free_blocks -= prevBlockSize; + unsigned short int prelimBlockSize = blockSize + prevBlockSize; + if(prelimBlockSize < blocks) + prelimBlockSize = blocks; + /* Disconnect the previous block from the FREE list */ umm_disconnect_from_free_list( UMM_PBLOCK(c) ); @@ -1685,6 +1725,29 @@ static void *_umm_realloc( void *ptr, size_t size ) { c = umm_assimilate_down(c, 0); + /* + * Currently all or most of the heap has been grabbed. Do an early split of + * allocation down to the amount needed to do a successful move operation. + * This will allow an alloc/new from a ISR to succeed while a memmove is + * running. + */ + if( (UMM_NBLOCK(c) - c) > prelimBlockSize ) { + umm_make_new_block( c, prelimBlockSize, 0, 0 ); + _umm_free( (void *)&UMM_DATA(c+prelimBlockSize) ); + } + + // This is the lowest low that may be seen by an ISR doing an alloc/new + if( ummStats.free_blocks < ummStats.free_blocks_min ) + ummStats.free_blocks_min = ummStats.free_blocks; + + /* + * For the ESP8266 interrupts should not be off for more than 10us. + * An unprotect/protect around memmove should be safe to do here. + * All variables used are on the stack. + */ + + UMM_CRITICAL_EXIT(id_realloc); + /* * Move the bytes down to the new block we just created, but be sure to move * only the original bytes. @@ -1695,17 +1758,15 @@ static void *_umm_realloc( void *ptr, size_t size ) { /* And don't forget to adjust the pointer to the new block location! */ ptr = (void *)&UMM_DATA(c); - } - unsigned short int startingBlockSize = blockSize; + /* Now resume critical section... */ + UMM_CRITICAL_ENTRY(id_realloc); + } - /* Now calculate the block size again...and we'll have three cases */ + // Now calculate the block size again...and we'll have three cases / blockSize = (UMM_NBLOCK(c) - c); - /* Update dynamic Free Block count */ - ummHeapFreeBlocks -= (blockSize - startingBlockSize); - if( blockSize == blocks ) { /* This space intentionally left blank - return the original pointer! */ @@ -1724,6 +1785,9 @@ static void *_umm_realloc( void *ptr, size_t size ) { } else { /* New block is bigger than the old block... */ + /* Finish up without critical section */ + UMM_CRITICAL_EXIT(id_realloc); + void *oldptr = ptr; DBG_LOG_DEBUG( "realloc %d to a bigger block %d, make new, copy, and free the old\n", blockSize, blocks ); @@ -1733,16 +1797,19 @@ static void *_umm_realloc( void *ptr, size_t size ) { * free up the old block, but only if the malloc was sucessful! */ - UMM_CRITICAL_EXIT(id_realloc); - if( (ptr = _umm_malloc( size )) ) { memcpy( ptr, oldptr, curSize ); _umm_free( oldptr ); + } else { + ummStats.oom_count += 1; // Needs atomic } return( ptr ); } + if (ummStats.free_blocks < ummStats.free_blocks_min) + ummStats.free_blocks_min = ummStats.free_blocks; + /* Release the critical section... */ UMM_CRITICAL_EXIT(id_realloc); @@ -1859,7 +1926,16 @@ void umm_free( void *ptr ) { /* ------------------------------------------------------------------------ */ size_t ICACHE_FLASH_ATTR umm_free_heap_size( void ) { - return (size_t)ummHeapFreeBlocks * sizeof(umm_block); + return (size_t)ummStats.free_blocks * sizeof(umm_block); +} + +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min( void ) { + return (size_t)ummStats.free_blocks_min * sizeof(umm_block); +} + +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min_reset( void ) { + ummStats.free_blocks_min = ummStats.free_blocks; + return (size_t)ummStats.free_blocks_min * sizeof(umm_block); } size_t ICACHE_FLASH_ATTR umm_max_block_size( void ) { diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index 155efdef60..ff39648119 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -16,6 +16,8 @@ extern "C" { #include "c_types.h" #include "umm_performance.h" +#include "umm_stats.h" + /* * There are a number of defines you can set at compile time that affect how * the memory allocator will operate. diff --git a/cores/esp8266/umm_malloc/umm_performance.cpp b/cores/esp8266/umm_malloc/umm_performance.cpp index 1999ad0bdb..4b9ec4e6b3 100644 --- a/cores/esp8266/umm_malloc/umm_performance.cpp +++ b/cores/esp8266/umm_malloc/umm_performance.cpp @@ -7,9 +7,12 @@ #include #include #include "umm_performance.h" +#include "umm_stats.h" extern "C" { +UMM_STATS ummStats = {0, 0, 0, 0}; + #ifdef UMM_CRITICAL_PERIOD_ANALYZE struct _UMM_TIME_STATS time_stats = { {0xFFFFFFFF, 0U, 0U, 0U}, diff --git a/cores/esp8266/umm_malloc/umm_stats.h b/cores/esp8266/umm_malloc/umm_stats.h new file mode 100644 index 0000000000..eba7e3cae1 --- /dev/null +++ b/cores/esp8266/umm_malloc/umm_stats.h @@ -0,0 +1,36 @@ +/* + * umm_malloc heap statistics + */ + +#ifndef _UMM_STATS_H +#define _UMM_STATS_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct UMM_STATS_t { + unsigned short int free_blocks; + unsigned short int free_blocks_min; + size_t alloc_max_size; + size_t oom_count; +} UMM_STATS; +extern UMM_STATS ummStats; + +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min( void ); +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min_reset( void ); + +size_t inline umm_get_max_alloc_size( void ) { + return ummStats.alloc_max_size; +} + +size_t inline umm_get_oom_count( void ) { + return ummStats.oom_count; +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _UMM_STATS_H */ From e86e7cacb0149199173cb590b486acb9730337e6 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Thu, 1 Aug 2019 15:44:15 -0700 Subject: [PATCH 14/17] Resolved ISR OOM concern in _umm_realloc() Updated realloc() to do a preliminary free() of unused space, before performing a critical section exit and memmove. This change was applied to the current _umm_realloc(). This change should reduce the risk of an ISR getting an OOM when interrupting an active realloc memmove operation. Added additional stats for verifying correct operation. Updated: for clarity and Travis-CI fail. --- cores/esp8266/umm_malloc/umm_stats.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_stats.h b/cores/esp8266/umm_malloc/umm_stats.h index eba7e3cae1..f2aae9875f 100644 --- a/cores/esp8266/umm_malloc/umm_stats.h +++ b/cores/esp8266/umm_malloc/umm_stats.h @@ -20,11 +20,11 @@ extern UMM_STATS ummStats; size_t ICACHE_FLASH_ATTR umm_free_heap_size_min( void ); size_t ICACHE_FLASH_ATTR umm_free_heap_size_min_reset( void ); -size_t inline umm_get_max_alloc_size( void ) { +inline size_t umm_get_max_alloc_size( void ) { return ummStats.alloc_max_size; } -size_t inline umm_get_oom_count( void ) { +inline size_t umm_get_oom_count( void ) { return ummStats.oom_count; } From b81e87f4dbfdbc3a49f27e85f536d4d65425e5d9 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Fri, 2 Aug 2019 17:16:17 -0700 Subject: [PATCH 15/17] Update to keep access to alternate printf in one file. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 8 +++++++- cores/esp8266/umm_malloc/umm_performance.h | 13 ------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 74e8e0b6fe..9a731a2ce6 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -495,13 +495,19 @@ #include #include -#define _UMM_MALLOC_CPP #include "umm_malloc.h" #include "umm_malloc_cfg.h" /* user-dependent */ extern "C" { +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +#define printf(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) +#else +// Macro to place constant strings into PROGMEM and print them properly +#define printf(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) +#endif + // From UMM, the last caller of a malloc/realloc/calloc which failed: extern void *umm_last_fail_alloc_addr; extern int umm_last_fail_alloc_size; diff --git a/cores/esp8266/umm_malloc/umm_performance.h b/cores/esp8266/umm_malloc/umm_performance.h index c8897eef11..5d6ae24c61 100644 --- a/cores/esp8266/umm_malloc/umm_performance.h +++ b/cores/esp8266/umm_malloc/umm_performance.h @@ -78,19 +78,6 @@ static inline void _critical_exit(time_stat_t *p, uint32_t *saved_ps) { int _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); #endif -// Use _UMM_MALLOC_CPP to limit access to umm_malloc.cpp module specific -// definitions that are hardware and platform specific. -#ifdef _UMM_MALLOC_CPP - -#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) -#define printf(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) -#else -// Macro to place constant strings into PROGMEM and print them properly -#define printf(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) -#endif - -#endif - #ifdef __cplusplus } #endif From 1bf9eb0aff1049d3575f19e648892c129dcb37ab Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Wed, 7 Aug 2019 11:01:32 -0700 Subject: [PATCH 16/17] Updated to use ISR safe versions of memmove, memcpy, and memset. The library versions of memmove, memcpy, and memset were in flash. Updated to use ROM functions ets_memmove, ets_memcpy, and ets_memset. Additional note, the library version of memmove does not appear to have been optimized. It took almost 10x longer than the ROM version. Renamed printf macro to DBGLOG_FUNCTION and moved to umm_malloc_cfg.h. Changed printf macro usage to use DBGLOG_FUNCTION. --- cores/esp8266/umm_malloc/umm_malloc.cpp | 48 +++++++++++------------ cores/esp8266/umm_malloc/umm_malloc_cfg.h | 11 ++++++ tools/sdk/include/ets_sys.h | 1 + 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 9a731a2ce6..ac736e840d 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -501,12 +501,12 @@ extern "C" { -#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) -#define printf(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) -#else -// Macro to place constant strings into PROGMEM and print them properly -#define printf(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) -#endif +#undef memcpy +#undef memmove +#undef memset +#define memcpy ets_memcpy +#define memmove ets_memmove +#define memset ets_memset // From UMM, the last caller of a malloc/realloc/calloc which failed: extern void *umm_last_fail_alloc_addr; @@ -611,42 +611,42 @@ Changes for July 2019: /* ------------------------------------------------------------------------- */ #if DBG_LOG_LEVEL >= 6 -# define DBG_LOG_TRACE( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_TRACE( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_TRACE( format, ... ) #endif #if DBG_LOG_LEVEL >= 5 -# define DBG_LOG_DEBUG( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_DEBUG( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_DEBUG( format, ... ) #endif #if DBG_LOG_LEVEL >= 4 -# define DBG_LOG_CRITICAL( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_CRITICAL( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_CRITICAL( format, ... ) #endif #if DBG_LOG_LEVEL >= 3 -# define DBG_LOG_ERROR( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_ERROR( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_ERROR( format, ... ) #endif #if DBG_LOG_LEVEL >= 2 -# define DBG_LOG_WARNING( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_WARNING( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_WARNING( format, ... ) #endif #if DBG_LOG_LEVEL >= 1 -# define DBG_LOG_INFO( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_INFO( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_INFO( format, ... ) #endif -#define DBG_LOG_FORCE( force, format, ... ) {if(force) {printf( format, ## __VA_ARGS__ );}} +#define DBG_LOG_FORCE( force, format, ... ) {if(force) {DBGLOG_FUNCTION( format, ## __VA_ARGS__ );}} /* }}} */ @@ -739,7 +739,7 @@ static int integrity_check(void) { /* Check that next free block number is valid */ if (cur >= UMM_NUMBLOCKS) { - printf("heap integrity broken: too large next free num: %d " + DBGLOG_FUNCTION("heap integrity broken: too large next free num: %d " "(in block %d, addr 0x%lx)\n", cur, prev, (unsigned long)&UMM_NBLOCK(prev)); ok = 0; @@ -752,7 +752,7 @@ static int integrity_check(void) { /* Check if prev free block number matches */ if (UMM_PFREE(cur) != prev) { - printf("heap integrity broken: free links don't match: " + DBGLOG_FUNCTION("heap integrity broken: free links don't match: " "%d -> %d, but %d -> %d\n", prev, cur, cur, UMM_PFREE(cur)); ok = 0; @@ -771,7 +771,7 @@ static int integrity_check(void) { /* Check that next block number is valid */ if (cur >= UMM_NUMBLOCKS) { - printf("heap integrity broken: too large next block num: %d " + DBGLOG_FUNCTION("heap integrity broken: too large next block num: %d " "(in block %d, addr 0x%lx)\n", cur, prev, (unsigned long)&UMM_NBLOCK(prev)); ok = 0; @@ -786,7 +786,7 @@ static int integrity_check(void) { if ((UMM_NBLOCK(cur) & UMM_FREELIST_MASK) != (UMM_PBLOCK(cur) & UMM_FREELIST_MASK)) { - printf("heap integrity broken: mask wrong at addr 0x%lx: n=0x%x, p=0x%x\n", + DBGLOG_FUNCTION("heap integrity broken: mask wrong at addr 0x%lx: n=0x%x, p=0x%x\n", (unsigned long)&UMM_NBLOCK(cur), (UMM_NBLOCK(cur) & UMM_FREELIST_MASK), (UMM_PBLOCK(cur) & UMM_FREELIST_MASK) @@ -800,7 +800,7 @@ static int integrity_check(void) { /* Check if prev block number matches */ if (UMM_PBLOCK(cur) != prev) { - printf("heap integrity broken: block links don't match: " + DBGLOG_FUNCTION("heap integrity broken: block links don't match: " "%d -> %d, but %d -> %d\n", prev, cur, cur, UMM_PBLOCK(cur)); ok = 0; @@ -846,7 +846,7 @@ static int integrity_check(void) { */ static void dump_mem ( const unsigned char *ptr, size_t len ) { while (len--) { - printf(" 0x%.2x", (unsigned int)(*ptr++)); + DBGLOG_FUNCTION(" 0x%.2x", (unsigned int)(*ptr++)); } } @@ -877,11 +877,11 @@ static int check_poison( const unsigned char *ptr, size_t poison_size, } if (!ok) { - printf("there is no poison %s the block. " + DBGLOG_FUNCTION("there is no poison %s the block. " "Expected poison address: 0x%lx, actual data:", where, (unsigned long)ptr); dump_mem(ptr, poison_size); - printf("\n"); + DBGLOG_FUNCTION("\n"); } return ok; @@ -895,7 +895,7 @@ static int check_poison_block( umm_block *pblock ) { int ok = 1; if (pblock->header.used.next & UMM_FREELIST_MASK) { - printf("check_poison_block is called for free block 0x%lx\n", + DBGLOG_FUNCTION("check_poison_block is called for free block 0x%lx\n", (unsigned long)pblock); } else { /* the block is used; let's check poison */ @@ -904,7 +904,7 @@ static int check_poison_block( umm_block *pblock ) { pc_cur = pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE); if (!check_poison(pc_cur, UMM_POISON_SIZE_BEFORE, "before")) { - printf("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); + DBGLOG_FUNCTION("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); UMM_HEAP_CORRUPTION_CB(); ok = 0; goto clean; @@ -912,7 +912,7 @@ static int check_poison_block( umm_block *pblock ) { pc_cur = pc + *((UMM_POISONED_BLOCK_LEN_TYPE *)pc) - UMM_POISON_SIZE_AFTER; if (!check_poison(pc_cur, UMM_POISON_SIZE_AFTER, "after")) { - printf("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); + DBGLOG_FUNCTION("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); UMM_HEAP_CORRUPTION_CB(); ok = 0; goto clean; diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index ff39648119..39d5897cc9 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -18,6 +18,17 @@ extern "C" { #include "umm_performance.h" #include "umm_stats.h" +#undef DBGLOG_FUNCTION +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +int _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +// Note, _isr_safe_printf_P will not handle additional string arguments in +// PROGMEM. Only the 1st parameter, fmt, is supported in PROGMEM. +#define DBGLOG_FUNCTION(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) +#else +// Macro to place constant strings into PROGMEM and print them properly +#define DBGLOG_FUNCTION(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) +#endif + /* * There are a number of defines you can set at compile time that affect how * the memory allocator will operate. diff --git a/tools/sdk/include/ets_sys.h b/tools/sdk/include/ets_sys.h index 731e562b4f..da1f3897a4 100644 --- a/tools/sdk/include/ets_sys.h +++ b/tools/sdk/include/ets_sys.h @@ -190,6 +190,7 @@ void *pvPortMalloc(size_t xWantedSize, const char* file, int line) __attribute__ void *pvPortRealloc(void* ptr, size_t xWantedSize, const char* file, int line) __attribute__((alloc_size(2))); void vPortFree(void *ptr, const char* file, int line); void *ets_memcpy(void *dest, const void *src, size_t n); +void *ets_memmove(void *dest, const void *src, size_t n); void *ets_memset(void *s, int c, size_t n); void ets_timer_arm_new(ETSTimer *a, int b, int c, int isMstimer); void ets_timer_setfn(ETSTimer *t, ETSTimerFunc *fn, void *parg); From 5d1a95f0c18f7e3f13d5ecb1f588727a71b82dbc Mon Sep 17 00:00:00 2001 From: Develo Date: Sun, 18 Aug 2019 13:10:55 -0400 Subject: [PATCH 17/17] Update umm_malloc.cpp Fix comment --- cores/esp8266/umm_malloc/umm_malloc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index ac736e840d..13930a828e 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -1769,7 +1769,7 @@ static void *_umm_realloc( void *ptr, size_t size ) { UMM_CRITICAL_ENTRY(id_realloc); } - // Now calculate the block size again...and we'll have three cases / + /* Now calculate the block size again...and we'll have three cases */ blockSize = (UMM_NBLOCK(c) - c);