diff --git a/cores/esp8266/Arduino.h b/cores/esp8266/Arduino.h index 7711e8fa58..518232ac6a 100644 --- a/cores/esp8266/Arduino.h +++ b/cores/esp8266/Arduino.h @@ -142,30 +142,9 @@ void timer0_detachInterrupt(void); void ets_intr_lock(); void ets_intr_unlock(); -#ifndef __STRINGIFY -#define __STRINGIFY(a) #a -#endif - -// these low level routines provide a replacement for SREG interrupt save that AVR uses -// but are esp8266 specific. A normal use pattern is like -// -//{ -// uint32_t savedPS = xt_rsil(1); // this routine will allow level 2 and above -// // do work here -// xt_wsr_ps(savedPS); // restore the state -//} -// -// level (0-15), interrupts of the given level and above will be active -// level 15 will disable ALL interrupts, -// level 0 will enable ALL interrupts, -// -#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state) :: "memory"); state;})) -#define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") - #define interrupts() xt_rsil(0) #define noInterrupts() xt_rsil(15) - #define clockCyclesPerMicrosecond() ( F_CPU / 1000000L ) #define clockCyclesToMicroseconds(a) ( (a) / clockCyclesPerMicrosecond() ) #define microsecondsToClockCycles(a) ( (a) * clockCyclesPerMicrosecond() ) diff --git a/cores/esp8266/Esp.h b/cores/esp8266/Esp.h index bc241eb392..46af2a8cb1 100644 --- a/cores/esp8266/Esp.h +++ b/cores/esp8266/Esp.h @@ -209,9 +209,7 @@ class EspClass { #ifndef CORE_MOCK uint32_t EspClass::getCycleCount() { - uint32_t ccount; - __asm__ __volatile__("esync; rsr %0,ccount":"=a" (ccount)); - return ccount; + return esp_get_cycle_count(); } #endif // !defined(CORE_MOCK) diff --git a/cores/esp8266/core_esp8266_features.h b/cores/esp8266/core_esp8266_features.h index aff8fa4de9..3b4289006a 100644 --- a/cores/esp8266/core_esp8266_features.h +++ b/cores/esp8266/core_esp8266_features.h @@ -33,7 +33,32 @@ #define WIFI_HAS_EVENT_CALLBACK +#ifndef __STRINGIFY +#define __STRINGIFY(a) #a +#endif +// these low level routines provide a replacement for SREG interrupt save that AVR uses +// but are esp8266 specific. A normal use pattern is like +// +//{ +// uint32_t savedPS = xt_rsil(1); // this routine will allow level 2 and above +// // do work here +// xt_wsr_ps(savedPS); // restore the state +//} +// +// level (0-15), interrupts of the given level and above will be active +// level 15 will disable ALL interrupts, +// level 0 will enable ALL interrupts, +// +#ifndef CORE_MOCK +#define xt_rsil(level) (__extension__({uint32_t state; __asm__ __volatile__("rsil %0," __STRINGIFY(level) : "=a" (state) :: "memory"); state;})) +#define xt_wsr_ps(state) __asm__ __volatile__("wsr %0,ps; isync" :: "a" (state) : "memory") -#endif +inline uint32_t esp_get_cycle_count() { + uint32_t ccount; + __asm__ __volatile__("rsr %0,ccount":"=a"(ccount)); + return ccount; +} +#endif // not CORE_MOCK +#endif diff --git a/cores/esp8266/umm_malloc/umm_malloc.cpp b/cores/esp8266/umm_malloc/umm_malloc.cpp index 09cfbdda71..13930a828e 100644 --- a/cores/esp8266/umm_malloc/umm_malloc.cpp +++ b/cores/esp8266/umm_malloc/umm_malloc.cpp @@ -501,6 +501,13 @@ extern "C" { +#undef memcpy +#undef memmove +#undef memset +#define memcpy ets_memcpy +#define memmove ets_memmove +#define memset ets_memset + // From UMM, the last caller of a malloc/realloc/calloc which failed: extern void *umm_last_fail_alloc_addr; extern int umm_last_fail_alloc_size; @@ -519,8 +526,50 @@ extern int umm_last_fail_alloc_size; # define DBG_LOG_LEVEL DBG_LOG_LEVEL #endif -// Macro to place constant strings into PROGMEM and print them properly -#define printf(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) +/* +Changes for July 2019: + + Correct critical section with interrupt level preserving and nest support + alternative. Replace ets_intr_lock()/ets_intr_unlock() with uint32_t + oldValue=xt_rsil(3)/xt_wrs(oldValue). Added UMM_CRITICAL_DECL macro to define + storage for current state. Expanded UMM_CRITICAL_... to use unique + identifiers. This helpt facilitate gather function specific timing + information. + + Replace printf with something that is ROM or IRAM based so that a printf + that occurs during an ISR malloc/new does not cause a crash. To avoid any + reentry issue it should also avoid doing malloc lib calls. + + Refactor realloc to avoid memcpy/memmove while in critical section. This is + only effective when realloc is called with interrupts enabled. The copy + process alone can take over 10us (when copying more than ~498 bytes with a + 80MHz CPU clock). It would be good practice for an ISR to avoid realloc. + Note, while doing this might initially sound scary, this appears to be very + stable. It ran on my troublesome sketch for over 3 weeks until I got back from + vacation and flashed an update. Troublesome sketch - runs ESPAsyncTCP, with + modified fauxmo emulation for 10 devices. It receives lost of Network traffic + related to uPnP scans, which includes lots of TCP connects disconnects RSTs + related to uPnP discovery. + + Locking is no longer nested in realloc, due to refactoring for reduced IRQ + off time. + + I have clocked umm_info critical lock time taking as much as 180us. A common + use for the umm_info call is to get the free heap result. It is common + to try and closely monitor free heap as a method to detect memory leaks. + This may result in frequent calls to umm_info. There has not been a clear + test case that shows an issue yet; however, I and others think they are or + have had crashes related to this. + + I have added code that updates a current free heap value from _umm_malloc, + _umm_realloc, and _umm_free. Removing the need to do a long interrupts + disabled calculation via umm_info. + + Build optional, min/max time measurements for locks held while in info, + malloc, realloc, and free. Also, maintains a count of how many times each is + called with INTLEVEL set. + + */ /* -- dbglog {{{ */ @@ -562,42 +611,42 @@ extern int umm_last_fail_alloc_size; /* ------------------------------------------------------------------------- */ #if DBG_LOG_LEVEL >= 6 -# define DBG_LOG_TRACE( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_TRACE( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_TRACE( format, ... ) #endif #if DBG_LOG_LEVEL >= 5 -# define DBG_LOG_DEBUG( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_DEBUG( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_DEBUG( format, ... ) #endif #if DBG_LOG_LEVEL >= 4 -# define DBG_LOG_CRITICAL( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_CRITICAL( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_CRITICAL( format, ... ) #endif #if DBG_LOG_LEVEL >= 3 -# define DBG_LOG_ERROR( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_ERROR( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_ERROR( format, ... ) #endif #if DBG_LOG_LEVEL >= 2 -# define DBG_LOG_WARNING( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_WARNING( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_WARNING( format, ... ) #endif #if DBG_LOG_LEVEL >= 1 -# define DBG_LOG_INFO( format, ... ) printf( format, ## __VA_ARGS__ ) +# define DBG_LOG_INFO( format, ... ) DBGLOG_FUNCTION( format, ## __VA_ARGS__ ) #else # define DBG_LOG_INFO( format, ... ) #endif -#define DBG_LOG_FORCE( force, format, ... ) {if(force) {printf( format, ## __VA_ARGS__ );}} +#define DBG_LOG_FORCE( force, format, ... ) {if(force) {DBGLOG_FUNCTION( format, ## __VA_ARGS__ );}} /* }}} */ @@ -646,6 +695,10 @@ unsigned short int umm_numblocks = 0; #define UMM_PFREE(b) (UMM_BLOCK(b).body.free.prev) #define UMM_DATA(b) (UMM_BLOCK(b).body.data) +/* + * This does not look safe, no access locks. It currently is not being + * built, so not an immediate issue. - 06/10/19 + */ /* integrity check (UMM_INTEGRITY_CHECK) {{{ */ #if defined(UMM_INTEGRITY_CHECK) /* @@ -686,7 +739,7 @@ static int integrity_check(void) { /* Check that next free block number is valid */ if (cur >= UMM_NUMBLOCKS) { - printf("heap integrity broken: too large next free num: %d " + DBGLOG_FUNCTION("heap integrity broken: too large next free num: %d " "(in block %d, addr 0x%lx)\n", cur, prev, (unsigned long)&UMM_NBLOCK(prev)); ok = 0; @@ -699,7 +752,7 @@ static int integrity_check(void) { /* Check if prev free block number matches */ if (UMM_PFREE(cur) != prev) { - printf("heap integrity broken: free links don't match: " + DBGLOG_FUNCTION("heap integrity broken: free links don't match: " "%d -> %d, but %d -> %d\n", prev, cur, cur, UMM_PFREE(cur)); ok = 0; @@ -718,7 +771,7 @@ static int integrity_check(void) { /* Check that next block number is valid */ if (cur >= UMM_NUMBLOCKS) { - printf("heap integrity broken: too large next block num: %d " + DBGLOG_FUNCTION("heap integrity broken: too large next block num: %d " "(in block %d, addr 0x%lx)\n", cur, prev, (unsigned long)&UMM_NBLOCK(prev)); ok = 0; @@ -733,7 +786,7 @@ static int integrity_check(void) { if ((UMM_NBLOCK(cur) & UMM_FREELIST_MASK) != (UMM_PBLOCK(cur) & UMM_FREELIST_MASK)) { - printf("heap integrity broken: mask wrong at addr 0x%lx: n=0x%x, p=0x%x\n", + DBGLOG_FUNCTION("heap integrity broken: mask wrong at addr 0x%lx: n=0x%x, p=0x%x\n", (unsigned long)&UMM_NBLOCK(cur), (UMM_NBLOCK(cur) & UMM_FREELIST_MASK), (UMM_PBLOCK(cur) & UMM_FREELIST_MASK) @@ -747,7 +800,7 @@ static int integrity_check(void) { /* Check if prev block number matches */ if (UMM_PBLOCK(cur) != prev) { - printf("heap integrity broken: block links don't match: " + DBGLOG_FUNCTION("heap integrity broken: block links don't match: " "%d -> %d, but %d -> %d\n", prev, cur, cur, UMM_PBLOCK(cur)); ok = 0; @@ -793,7 +846,7 @@ static int integrity_check(void) { */ static void dump_mem ( const unsigned char *ptr, size_t len ) { while (len--) { - printf(" 0x%.2x", (unsigned int)(*ptr++)); + DBGLOG_FUNCTION(" 0x%.2x", (unsigned int)(*ptr++)); } } @@ -824,11 +877,11 @@ static int check_poison( const unsigned char *ptr, size_t poison_size, } if (!ok) { - printf("there is no poison %s the block. " + DBGLOG_FUNCTION("there is no poison %s the block. " "Expected poison address: 0x%lx, actual data:", where, (unsigned long)ptr); dump_mem(ptr, poison_size); - printf("\n"); + DBGLOG_FUNCTION("\n"); } return ok; @@ -842,7 +895,7 @@ static int check_poison_block( umm_block *pblock ) { int ok = 1; if (pblock->header.used.next & UMM_FREELIST_MASK) { - printf("check_poison_block is called for free block 0x%lx\n", + DBGLOG_FUNCTION("check_poison_block is called for free block 0x%lx\n", (unsigned long)pblock); } else { /* the block is used; let's check poison */ @@ -851,7 +904,7 @@ static int check_poison_block( umm_block *pblock ) { pc_cur = pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE); if (!check_poison(pc_cur, UMM_POISON_SIZE_BEFORE, "before")) { - printf("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); + DBGLOG_FUNCTION("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); UMM_HEAP_CORRUPTION_CB(); ok = 0; goto clean; @@ -859,7 +912,7 @@ static int check_poison_block( umm_block *pblock ) { pc_cur = pc + *((UMM_POISONED_BLOCK_LEN_TYPE *)pc) - UMM_POISON_SIZE_AFTER; if (!check_poison(pc_cur, UMM_POISON_SIZE_AFTER, "after")) { - printf("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); + DBGLOG_FUNCTION("block start: %08x\n", pc + sizeof(UMM_POISONED_BLOCK_LEN_TYPE) + UMM_POISON_SIZE_BEFORE); UMM_HEAP_CORRUPTION_CB(); ok = 0; goto clean; @@ -982,6 +1035,7 @@ static void *get_unpoisoned( void *vptr ) { UMM_HEAP_INFO ummHeapInfo; void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { + UMM_CRITICAL_DECL(id_info); unsigned short int blockNo = 0; @@ -990,7 +1044,7 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { } /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); + UMM_CRITICAL_ENTRY(id_info); /* * Clear out all of the entries in the ummHeapInfo structure before doing @@ -1051,7 +1105,7 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { if( ptr == &UMM_BLOCK(blockNo) ) { /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_info); return( ptr ); } @@ -1104,8 +1158,23 @@ void ICACHE_FLASH_ATTR *umm_info( void *ptr, int force ) { ummHeapInfo.usedBlocks, ummHeapInfo.freeBlocks ); + if (ummHeapInfo.freeBlocks == ummStats.free_blocks) { + DBG_LOG_FORCE( force, "\nheap info Free blocks and heap statistics Free blocks match.\n"); + } else { + DBG_LOG_FORCE( force, "\nheap info Free blocks %5d != heap statistics Free Blocks %5d\n\n", + ummHeapInfo.freeBlocks, + ummStats.free_blocks ); + } + + DBG_LOG_FORCE( force, "\numm heap statistics:\n"); + DBG_LOG_FORCE( force, " Free Space %5u\n", ummStats.free_blocks * sizeof(umm_block)); + DBG_LOG_FORCE( force, " Low Watermark %5u\n", ummStats.free_blocks_min * sizeof(umm_block)); + DBG_LOG_FORCE( force, " MAX Alloc Request %5u\n", ummStats.alloc_max_size); + DBG_LOG_FORCE( force, " OOM Count %5u\n", ummStats.oom_count); + DBG_LOG_FORCE( force, " Size of umm_block %5u\n", sizeof(umm_block)); + /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_info); return( NULL ); } @@ -1223,6 +1292,9 @@ void ICACHE_FLASH_ATTR umm_init( void ) { /* index of the latest `umm_block` */ const unsigned short int block_last = UMM_NUMBLOCKS - 1; + /* init ummStats */ + ummStats.free_blocks = ummStats.free_blocks_min = block_last; + /* setup the 0th `umm_block`, which just points to the 1st */ UMM_NBLOCK(block_0th) = block_1th; UMM_NFREE(block_0th) = block_1th; @@ -1266,6 +1338,7 @@ void ICACHE_FLASH_ATTR umm_init( void ) { /* ------------------------------------------------------------------------ */ static void _umm_free( void *ptr ) { + UMM_CRITICAL_DECL(id_free); unsigned short int c; @@ -1286,15 +1359,18 @@ static void _umm_free( void *ptr ) { * on the free list! */ - /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); - /* Figure out which block we're in. Note the use of truncated division... */ c = (((char *)ptr)-(char *)(&(umm_heap[0])))/sizeof(umm_block); DBG_LOG_DEBUG( "Freeing block %6d\n", c ); + /* Protect the critical section... */ + UMM_CRITICAL_ENTRY(id_free); + + /* Update heap statistics */ + ummStats.free_blocks += (UMM_NBLOCK(c) - c); + /* Now let's assimilate this block with the next one if possible. */ umm_assimilate_up( c ); @@ -1346,12 +1422,14 @@ static void _umm_free( void *ptr ) { #endif /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_free); } /* ------------------------------------------------------------------------ */ static void *_umm_malloc( size_t size ) { + UMM_CRITICAL_DECL(id_malloc); + unsigned short int blocks; unsigned short int blockSize = 0; @@ -1377,8 +1455,8 @@ static void *_umm_malloc( size_t size ) { return( (void *)NULL ); } - /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); + if ( size > ummStats.alloc_max_size ) + ummStats.alloc_max_size = size; blocks = umm_blocks( size ); @@ -1390,6 +1468,9 @@ static void *_umm_malloc( size_t size ) { * algorithm */ + /* Protect the critical section... */ + UMM_CRITICAL_ENTRY(id_malloc); + cf = UMM_NFREE(0); bestBlock = UMM_NFREE(0); @@ -1462,19 +1543,27 @@ static void *_umm_malloc( size_t size ) { UMM_PFREE( UMM_NFREE(cf) ) = cf + blocks; UMM_NFREE( cf + blocks ) = UMM_NFREE(cf); } + + /* Update heap statistics */ + ummStats.free_blocks -= blocks; + if (ummStats.free_blocks < ummStats.free_blocks_min) + ummStats.free_blocks_min = ummStats.free_blocks; + } else { + ummStats.oom_count += 1; + + /* Release the critical section... */ + UMM_CRITICAL_EXIT(id_malloc); + /* Out of memory */ DBG_LOG_DEBUG( "Can't allocate %5d blocks\n", blocks ); - /* Release the critical section... */ - UMM_CRITICAL_EXIT(); - return( (void *)NULL ); } /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_malloc); return( (void *)&UMM_DATA(cf) ); } @@ -1482,10 +1571,10 @@ static void *_umm_malloc( size_t size ) { /* ------------------------------------------------------------------------ */ static void *_umm_realloc( void *ptr, size_t size ) { + UMM_CRITICAL_DECL(id_realloc); unsigned short int blocks; unsigned short int blockSize; - unsigned short int c; size_t curSize; @@ -1522,8 +1611,33 @@ static void *_umm_realloc( void *ptr, size_t size ) { return( (void *)NULL ); } - /* Protect the critical section... */ - UMM_CRITICAL_ENTRY(); + if ( size > ummStats.alloc_max_size ) + ummStats.alloc_max_size = size; + + /* + * Defer starting critical section. + * + * Initially we should be safe without a critical section as long as we are + * referencing values that are within our allocation as constants. + * And only reference values that will not change, while the redefintions of + * the allocations around us change. + * + * Example UMM_PBLOCK() could be change by a call to malloc from an ISR. + * On the other hand UMM_NBLOCK() is safe returns an address of the next + * block. The calculation is all based on information within our allocation + * that remains constant, until we change it. + * + * As long as we don't try to modify the next block or walk the chain of + * blocks we are okay. + * + * When called by an "interrupts enabled" type caller, it bears the + * responsibility to not call again, with the allocate we are currently + * working on. I think this is a normal expectation. I could be wrong. + * Such a situation would involve a function that is called from foreground + * and ISR context. Such code would already have to be re-entrant. This + * change may expand the corner cases for such a function. + * + */ /* * Otherwise we need to actually do a reallocation. A naiive approach @@ -1559,12 +1673,12 @@ static void *_umm_realloc( void *ptr, size_t size ) { DBG_LOG_DEBUG( "realloc the same size block - %d, do nothing\n", blocks ); - /* Release the critical section... */ - UMM_CRITICAL_EXIT(); - return( ptr ); } + /* Now we need a critical section... */ + UMM_CRITICAL_ENTRY(id_realloc); + /* * Now we have a block size that could be bigger or smaller. Either * way, try to assimilate up to the next block before doing anything... @@ -1573,7 +1687,13 @@ static void *_umm_realloc( void *ptr, size_t size ) { * assimilation step later in free :-) */ - umm_assimilate_up( c ); + if( UMM_NBLOCK(UMM_NBLOCK(c)) & UMM_FREELIST_MASK ) { + // This will often be most of the free heap. The excess is + // restored when umm_free() is called before returning. + ummStats.free_blocks -= + (UMM_NBLOCK(UMM_NBLOCK(c)) & UMM_BLOCKNO_MASK) - UMM_NBLOCK(c); + umm_assimilate_up( c ); + } /* * Now check if it might help to assimilate down, but don't actually @@ -1589,6 +1709,17 @@ static void *_umm_realloc( void *ptr, size_t size ) { DBG_LOG_DEBUG( "realloc() could assimilate down %d blocks - fits!\n\r", c-UMM_PBLOCK(c) ); + /* + * Calculate the number of blocks to keep while the information is + * still available. + */ + + unsigned short int prevBlockSize = c - UMM_PBLOCK(c); + ummStats.free_blocks -= prevBlockSize; + unsigned short int prelimBlockSize = blockSize + prevBlockSize; + if(prelimBlockSize < blocks) + prelimBlockSize = blocks; + /* Disconnect the previous block from the FREE list */ umm_disconnect_from_free_list( UMM_PBLOCK(c) ); @@ -1600,6 +1731,29 @@ static void *_umm_realloc( void *ptr, size_t size ) { c = umm_assimilate_down(c, 0); + /* + * Currently all or most of the heap has been grabbed. Do an early split of + * allocation down to the amount needed to do a successful move operation. + * This will allow an alloc/new from a ISR to succeed while a memmove is + * running. + */ + if( (UMM_NBLOCK(c) - c) > prelimBlockSize ) { + umm_make_new_block( c, prelimBlockSize, 0, 0 ); + _umm_free( (void *)&UMM_DATA(c+prelimBlockSize) ); + } + + // This is the lowest low that may be seen by an ISR doing an alloc/new + if( ummStats.free_blocks < ummStats.free_blocks_min ) + ummStats.free_blocks_min = ummStats.free_blocks; + + /* + * For the ESP8266 interrupts should not be off for more than 10us. + * An unprotect/protect around memmove should be safe to do here. + * All variables used are on the stack. + */ + + UMM_CRITICAL_EXIT(id_realloc); + /* * Move the bytes down to the new block we just created, but be sure to move * only the original bytes. @@ -1610,6 +1764,9 @@ static void *_umm_realloc( void *ptr, size_t size ) { /* And don't forget to adjust the pointer to the new block location! */ ptr = (void *)&UMM_DATA(c); + + /* Now resume critical section... */ + UMM_CRITICAL_ENTRY(id_realloc); } /* Now calculate the block size again...and we'll have three cases */ @@ -1634,6 +1791,9 @@ static void *_umm_realloc( void *ptr, size_t size ) { } else { /* New block is bigger than the old block... */ + /* Finish up without critical section */ + UMM_CRITICAL_EXIT(id_realloc); + void *oldptr = ptr; DBG_LOG_DEBUG( "realloc %d to a bigger block %d, make new, copy, and free the old\n", blockSize, blocks ); @@ -1646,12 +1806,18 @@ static void *_umm_realloc( void *ptr, size_t size ) { if( (ptr = _umm_malloc( size )) ) { memcpy( ptr, oldptr, curSize ); _umm_free( oldptr ); + } else { + ummStats.oom_count += 1; // Needs atomic } + return( ptr ); } + if (ummStats.free_blocks < ummStats.free_blocks_min) + ummStats.free_blocks_min = ummStats.free_blocks; + /* Release the critical section... */ - UMM_CRITICAL_EXIT(); + UMM_CRITICAL_EXIT(id_realloc); return( ptr ); } @@ -1766,8 +1932,16 @@ void umm_free( void *ptr ) { /* ------------------------------------------------------------------------ */ size_t ICACHE_FLASH_ATTR umm_free_heap_size( void ) { - umm_info(NULL, 0); - return (size_t)ummHeapInfo.freeBlocks * sizeof(umm_block); + return (size_t)ummStats.free_blocks * sizeof(umm_block); +} + +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min( void ) { + return (size_t)ummStats.free_blocks_min * sizeof(umm_block); +} + +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min_reset( void ) { + ummStats.free_blocks_min = ummStats.free_blocks; + return (size_t)ummStats.free_blocks_min * sizeof(umm_block); } size_t ICACHE_FLASH_ATTR umm_max_block_size( void ) { diff --git a/cores/esp8266/umm_malloc/umm_malloc_cfg.h b/cores/esp8266/umm_malloc/umm_malloc_cfg.h index 14fb4a8e48..39d5897cc9 100644 --- a/cores/esp8266/umm_malloc/umm_malloc_cfg.h +++ b/cores/esp8266/umm_malloc/umm_malloc_cfg.h @@ -10,10 +10,25 @@ extern "C" { #endif +#include #include #include #include "c_types.h" +#include "umm_performance.h" +#include "umm_stats.h" + +#undef DBGLOG_FUNCTION +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +int _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +// Note, _isr_safe_printf_P will not handle additional string arguments in +// PROGMEM. Only the 1st parameter, fmt, is supported in PROGMEM. +#define DBGLOG_FUNCTION(fmt, ...) _isr_safe_printf_P(PSTR(fmt), ##__VA_ARGS__) +#else +// Macro to place constant strings into PROGMEM and print them properly +#define DBGLOG_FUNCTION(fmt, ...) printf(PSTR(fmt), ## __VA_ARGS__ ) +#endif + /* * There are a number of defines you can set at compile time that affect how * the memory allocator will operate. @@ -110,8 +125,22 @@ extern char _heap_start[]; * called from within umm_malloc() */ -#define UMM_CRITICAL_ENTRY() ets_intr_lock() -#define UMM_CRITICAL_EXIT() ets_intr_unlock() + +#if defined(UMM_CRITICAL_PERIOD_ANALYZE) + +#define UMM_CRITICAL_DECL(tag) uint32_t _saved_ps_##tag +#define UMM_CRITICAL_ENTRY(tag) _critical_entry(&time_stats.tag, &_saved_ps_##tag) +#define UMM_CRITICAL_EXIT(tag) _critical_exit(&time_stats.tag, &_saved_ps_##tag) + +#else + +// This method preserves the intlevel on entry and restores the +// original intlevel at exit. +#define UMM_CRITICAL_DECL(tag) uint32_t _saved_ps_##tag +#define UMM_CRITICAL_ENTRY(tag) _saved_ps_##tag = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL) +#define UMM_CRITICAL_EXIT(tag) xt_wsr_ps(_saved_ps_##tag) + +#endif /* * -D UMM_INTEGRITY_CHECK : diff --git a/cores/esp8266/umm_malloc/umm_performance.cpp b/cores/esp8266/umm_malloc/umm_performance.cpp new file mode 100644 index 0000000000..4b9ec4e6b3 --- /dev/null +++ b/cores/esp8266/umm_malloc/umm_performance.cpp @@ -0,0 +1,81 @@ +/* + * umm_malloc performance measurments and ESP specifics + */ + +#include +#include +#include +#include +#include "umm_performance.h" +#include "umm_stats.h" + +extern "C" { + +UMM_STATS ummStats = {0, 0, 0, 0}; + +#ifdef UMM_CRITICAL_PERIOD_ANALYZE +struct _UMM_TIME_STATS time_stats = { + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U}, + {0xFFFFFFFF, 0U, 0U, 0U} }; + +bool ICACHE_FLASH_ATTR get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size) { + if (p && sizeof(time_stats) == size) { + uint32_t save_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); + memcpy(p, &time_stats, size); + xt_wsr_ps(save_ps); + return true; + } + return false; +} +#endif + +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +/* + Printing from the malloc routines is tricky. Since a lot of library calls + will want to do malloc. + + Objective: To be able to print "last gasp" diagnostic messages + when interrupts are disabled and w/o availability of heap resources. +*/ + +// ROM _putc1, ignores CRs and sends CR/LF for LF, newline. +// Always returns character sent. +int constexpr (*_rom_putc1)(int) = (int (*)(int))0x40001dcc; +void uart_buff_switch(uint8_t); + +int _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int ICACHE_RAM_ATTR _isr_safe_printf_P(const char *fmt, ...) { +#ifdef DEBUG_ESP_PORT +#define VALUE(x) __STRINGIFY(x) + // Preprocessor and compiler together will optimize away the if. + if (strcmp("Serial1", VALUE(DEBUG_ESP_PORT)) == 0) { + uart_buff_switch(1U); + } else { + uart_buff_switch(0U); + } +#else + uart_buff_switch(0U); // Side effect, clears RX FIFO +#endif + /* + To use ets_strlen() and ets_memcpy() safely with PROGMEM, flash storage, + the PROGMEM address must be word (4 bytes) aligned. The destination + address for ets_memcpy must also be word-aligned. We also round the + buf_len up to the nearest word boundary. So that all transfers will be + whole words. + */ + size_t str_len = ets_strlen(fmt); + size_t buf_len = (str_len + 1 + 3) & ~0x03U; + char ram_buf[buf_len] __attribute__ ((aligned(4))); + ets_memcpy(ram_buf, fmt, buf_len); + va_list argPtr; + va_start(argPtr, fmt); + int result = ets_vprintf(_rom_putc1, ram_buf, argPtr); + va_end(argPtr); + return result; +} + +#endif + +}; diff --git a/cores/esp8266/umm_malloc/umm_performance.h b/cores/esp8266/umm_malloc/umm_performance.h new file mode 100644 index 0000000000..5d6ae24c61 --- /dev/null +++ b/cores/esp8266/umm_malloc/umm_performance.h @@ -0,0 +1,85 @@ +/* + * umm_malloc performance measurments and ESP specifics + */ + +#ifndef _UMM_PERFORMANCE_H +#define _UMM_PERFORMANCE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * -D UMM_CRITICAL_PERIOD_ANALYZE : + * + * Build option to collect timing usage data on critical section usage in + * functions: info, malloc, realloc. Collects MIN, MAX, and number of time + * IRQs were disabled at request time. Note, for realloc MAX disabled time + * will not include the time from calling malloc and/or free. + * Examine code for specifics on what info is available and how to access. +*/ +// #define UMM_CRITICAL_PERIOD_ANALYZE + + +/* + Per Devyte, the core currently doesn't support masking a specific interrupt + level. That doesn't mean it can't be implemented, only that at this time + locking is implemented as all or nothing. + https://github.com/esp8266/Arduino/issues/6246#issuecomment-508612609 + + So for now we default to all, 15. + */ +#ifndef DEFAULT_CRITICAL_SECTION_INTLEVEL +#define DEFAULT_CRITICAL_SECTION_INTLEVEL 15 +#endif + +#if defined(UMM_CRITICAL_PERIOD_ANALYZE) +// This option adds support for gathering time locked data +typedef struct _TIME_STAT { + uint32_t min; + uint32_t max; + uint32_t start; + uint32_t intlevel; +} time_stat_t; + +struct _UMM_TIME_STATS { + time_stat_t id_malloc; + time_stat_t id_realloc; + time_stat_t id_free; + time_stat_t id_info; +}; + +extern struct _UMM_TIME_STATS time_stats; + +bool get_umm_get_perf_data(struct _UMM_TIME_STATS *p, size_t size); + +static inline void _critical_entry(time_stat_t *p, uint32_t *saved_ps) { + *saved_ps = xt_rsil(DEFAULT_CRITICAL_SECTION_INTLEVEL); + if (0U != (*saved_ps & 0x0FU)) { + p->intlevel += 1U; + } + + p->start = esp_get_cycle_count(); +} + +static inline void _critical_exit(time_stat_t *p, uint32_t *saved_ps) { + uint32_t elapse = esp_get_cycle_count() - p->start; + if (elapse < p->min) + p->min = elapse; + + if (elapse > p->max) + p->max = elapse; + + xt_wsr_ps(*saved_ps); +} +#endif + +#if defined(DEBUG_ESP_PORT) || defined(DEBUG_ESP_ISR) +int _isr_safe_printf_P(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _UMM_PERFORMANCE_H */ diff --git a/cores/esp8266/umm_malloc/umm_stats.h b/cores/esp8266/umm_malloc/umm_stats.h new file mode 100644 index 0000000000..f2aae9875f --- /dev/null +++ b/cores/esp8266/umm_malloc/umm_stats.h @@ -0,0 +1,36 @@ +/* + * umm_malloc heap statistics + */ + +#ifndef _UMM_STATS_H +#define _UMM_STATS_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct UMM_STATS_t { + unsigned short int free_blocks; + unsigned short int free_blocks_min; + size_t alloc_max_size; + size_t oom_count; +} UMM_STATS; +extern UMM_STATS ummStats; + +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min( void ); +size_t ICACHE_FLASH_ATTR umm_free_heap_size_min_reset( void ); + +inline size_t umm_get_max_alloc_size( void ) { + return ummStats.alloc_max_size; +} + +inline size_t umm_get_oom_count( void ) { + return ummStats.oom_count; +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _UMM_STATS_H */ diff --git a/tools/sdk/include/ets_sys.h b/tools/sdk/include/ets_sys.h index 731e562b4f..da1f3897a4 100644 --- a/tools/sdk/include/ets_sys.h +++ b/tools/sdk/include/ets_sys.h @@ -190,6 +190,7 @@ void *pvPortMalloc(size_t xWantedSize, const char* file, int line) __attribute__ void *pvPortRealloc(void* ptr, size_t xWantedSize, const char* file, int line) __attribute__((alloc_size(2))); void vPortFree(void *ptr, const char* file, int line); void *ets_memcpy(void *dest, const void *src, size_t n); +void *ets_memmove(void *dest, const void *src, size_t n); void *ets_memset(void *s, int c, size_t n); void ets_timer_arm_new(ETSTimer *a, int b, int c, int isMstimer); void ets_timer_setfn(ETSTimer *t, ETSTimerFunc *fn, void *parg);