From 65605968646032e89260ee7dd006d7fc1eeeb821 Mon Sep 17 00:00:00 2001
From: Scott Shawcroft <scott.shawcroft@gmail.com>
Date: Tue, 23 Jan 2018 15:58:05 -0800
Subject: [PATCH 1/5] Switch to m_malloc_maybe and m_free to reduce our
 dependence on gc_alloc.

gc_alloc's API is changing and we shouldn't need to care about it.
So, we switch to m_malloc which has the default behavior we expect.
---
 ports/atmel-samd/spi_flash.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/ports/atmel-samd/spi_flash.c b/ports/atmel-samd/spi_flash.c
index a64c0ed331f8f..1070481ea830e 100644
--- a/ports/atmel-samd/spi_flash.c
+++ b/ports/atmel-samd/spi_flash.c
@@ -30,7 +30,7 @@
 
 #include "extmod/vfs.h"
 #include "extmod/vfs_fat.h"
-#include "py/gc.h"
+#include "py/misc.h"
 #include "py/obj.h"
 #include "py/runtime.h"
 #include "lib/oofatfs/ff.h"
@@ -256,12 +256,12 @@ void spi_flash_init(void) {
     if (spi_flash_is_initialised) {
         return;
     }
-        
+
     samd_peripherals_sercom_clock_init(SPI_FLASH_SERCOM, SPI_FLASH_SERCOM_INDEX);
 
     // Set up with defaults, then change.
     spi_m_sync_init(&spi_flash_desc, SPI_FLASH_SERCOM);
-        
+
     hri_sercomspi_write_CTRLA_DOPO_bf(SPI_FLASH_SERCOM, SPI_FLASH_DOPO);
     hri_sercomspi_write_CTRLA_DIPO_bf(SPI_FLASH_SERCOM, SPI_FLASH_DIPO);
 
@@ -398,7 +398,7 @@ static bool flush_scratch_flash(void) {
 static bool allocate_ram_cache(void) {
     uint8_t blocks_per_sector = SPI_FLASH_ERASE_SIZE / FILESYSTEM_BLOCK_SIZE;
     uint8_t pages_per_block = FILESYSTEM_BLOCK_SIZE / SPI_FLASH_PAGE_SIZE;
-    MP_STATE_VM(flash_ram_cache) = gc_alloc(blocks_per_sector * pages_per_block * sizeof(uint32_t), false);
+    MP_STATE_VM(flash_ram_cache) = m_malloc_maybe(blocks_per_sector * pages_per_block * sizeof(uint32_t), false);
     if (MP_STATE_VM(flash_ram_cache) == NULL) {
         return false;
     }
@@ -409,7 +409,7 @@ static bool allocate_ram_cache(void) {
     bool success = true;
     for (i = 0; i < blocks_per_sector; i++) {
         for (j = 0; j < pages_per_block; j++) {
-            uint8_t *page_cache = gc_alloc(SPI_FLASH_PAGE_SIZE, false);
+            uint8_t *page_cache = m_malloc_maybe(SPI_FLASH_PAGE_SIZE, false);
             if (page_cache == NULL) {
                 success = false;
                 break;
@@ -427,11 +427,11 @@ static bool allocate_ram_cache(void) {
         i++;
         for (; i > 0; i--) {
             for (; j > 0; j--) {
-                gc_free(MP_STATE_VM(flash_ram_cache)[(i - 1) * pages_per_block + (j - 1)]);
+                m_free(MP_STATE_VM(flash_ram_cache)[(i - 1) * pages_per_block + (j - 1)]);
             }
             j = pages_per_block;
         }
-        gc_free(MP_STATE_VM(flash_ram_cache));
+        m_free(MP_STATE_VM(flash_ram_cache));
         MP_STATE_VM(flash_ram_cache) = NULL;
     }
     return success;
@@ -474,13 +474,13 @@ static bool flush_ram_cache(bool keep_cache) {
                         MP_STATE_VM(flash_ram_cache)[i * pages_per_block + j],
                         SPI_FLASH_PAGE_SIZE);
             if (!keep_cache) {
-                gc_free(MP_STATE_VM(flash_ram_cache)[i * pages_per_block + j]);
+                m_free(MP_STATE_VM(flash_ram_cache)[i * pages_per_block + j]);
             }
         }
     }
     // We're done with the cache for now so give it back.
     if (!keep_cache) {
-        gc_free(MP_STATE_VM(flash_ram_cache));
+        m_free(MP_STATE_VM(flash_ram_cache));
         MP_STATE_VM(flash_ram_cache) = NULL;
     }
     return true;

From 56bd0789af7b11bcc15dc901745f61b83b1f7ff3 Mon Sep 17 00:00:00 2001
From: Scott Shawcroft <scott.shawcroft@gmail.com>
Date: Tue, 23 Jan 2018 15:59:26 -0800
Subject: [PATCH 2/5] Speed up qstr loading by using the stack to store a
 temporary string instead of the heap.

---
 py/persistentcode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/py/persistentcode.c b/py/persistentcode.c
index 477ba7501c34c..ba126a3fb254c 100644
--- a/py/persistentcode.c
+++ b/py/persistentcode.c
@@ -124,10 +124,9 @@ STATIC size_t read_uint(mp_reader_t *reader) {
 
 STATIC qstr load_qstr(mp_reader_t *reader) {
     size_t len = read_uint(reader);
-    char *str = m_new(char, len);
+    char str[len];
     read_bytes(reader, (byte*)str, len);
     qstr qst = qstr_from_strn(str, len);
-    m_del(char, str, len);
     return qst;
 }
 

From 416abe33ed44e44d8f75342731080217e123278b Mon Sep 17 00:00:00 2001
From: Scott Shawcroft <scott.shawcroft@gmail.com>
Date: Tue, 23 Jan 2018 16:22:05 -0800
Subject: [PATCH 3/5] Introduce a long lived section of the heap.

This adapts the allocation process to start from either end of the heap
when searching for free space. The default behavior is identical to the
existing behavior where it starts with the lowest block and looks higher.
Now it can also look from the highest block and lower depending on the
long_lived parameter to gc_alloc. As the heap fills, the two sections may
overlap. When they overlap, a collect may be triggered in order to keep
the long lived section compact. However, free space is always eligable
for each type of allocation.

By starting from either of the end of the heap we have ability to separate
short lived objects from long lived ones. This separation reduces heap
fragmentation because long lived objects are easy to densely pack.

Most objects are short lived initially but may be made long lived when
they are referenced by a type or module. This involves copying the
memory and then letting the collect phase free the old portion.

QSTR pools and chunks are always long lived because they are never freed.

The reallocation, collection and free processes are largely unchanged. They
simply also maintain an index to the highest free block as well as the lowest.
These indices are used to speed up the allocation search until the next collect.

In practice, this change may slightly slow down import statements with the
benefit that memory is much less fragmented afterwards. For example, a test
import into a 20k heap that leaves ~6k free previously had the largest
continuous free space of ~400 bytes. After this change, the largest continuous
free space is over 3400 bytes.
---
 py/builtinimport.c |   8 ++
 py/gc.c            | 179 +++++++++++++++++++++++++++++++++++----------
 py/gc.h            |   4 +-
 py/gc_long_lived.c | 132 +++++++++++++++++++++++++++++++++
 py/gc_long_lived.h |  43 +++++++++++
 py/malloc.c        |  19 +++--
 py/misc.h          |  21 ++++--
 py/modbuiltins.c   |   2 +-
 py/mpstate.h       |   7 +-
 py/obj.h           |   1 +
 py/objmodule.c     |  14 +++-
 py/objtype.c       |   7 +-
 py/py.mk           |   1 +
 py/qstr.c          |   9 ++-
 14 files changed, 378 insertions(+), 69 deletions(-)
 create mode 100644 py/gc_long_lived.c
 create mode 100644 py/gc_long_lived.h

diff --git a/py/builtinimport.c b/py/builtinimport.c
index b76ea00bd8198..9f7d34dcaea95 100644
--- a/py/builtinimport.c
+++ b/py/builtinimport.c
@@ -30,6 +30,7 @@
 #include <assert.h>
 
 #include "py/compile.h"
+#include "py/gc_long_lived.h"
 #include "py/objmodule.h"
 #include "py/persistentcode.h"
 #include "py/runtime.h"
@@ -144,6 +145,7 @@ STATIC void do_load_from_lexer(mp_obj_t module_obj, mp_lexer_t *lex) {
     // parse, compile and execute the module in its context
     mp_obj_dict_t *mod_globals = mp_obj_module_get_globals(module_obj);
     mp_parse_compile_execute(lex, MP_PARSE_FILE_INPUT, mod_globals, mod_globals);
+    mp_obj_module_set_globals(module_obj, make_dict_long_lived(mod_globals, 10));
 }
 #endif
 
@@ -173,6 +175,8 @@ STATIC void do_execute_raw_code(mp_obj_t module_obj, mp_raw_code_t *raw_code) {
 
         // finish nlr block, restore context
         nlr_pop();
+        mp_obj_module_set_globals(module_obj,
+            make_dict_long_lived(mp_obj_module_get_globals(module_obj), 10));
         mp_globals_set(old_globals);
         mp_locals_set(old_locals);
     } else {
@@ -468,6 +472,10 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
             if (outer_module_obj != MP_OBJ_NULL) {
                 qstr s = qstr_from_strn(mod_str + last, i - last);
                 mp_store_attr(outer_module_obj, s, module_obj);
+                // The above store can cause a dictionary rehash and new allocation. So,
+                // lets make sure the globals dictionary is still long lived.
+                mp_obj_module_set_globals(outer_module_obj,
+                    make_dict_long_lived(mp_obj_module_get_globals(outer_module_obj), 10));
             }
             outer_module_obj = module_obj;
             if (top_module_obj == MP_OBJ_NULL) {
diff --git a/py/gc.c b/py/gc.c
index 16b6e01927c61..ffdcbe5c02566 100644
--- a/py/gc.c
+++ b/py/gc.c
@@ -41,6 +41,9 @@
 #define DEBUG_printf(...) (void)0
 #endif
 
+// Uncomment this if you want to use a debugger to capture state at every allocation and free.
+// #define LOG_HEAP_ACTIVITY 1
+
 // make this 1 to dump the heap each time it changes
 #define EXTENSIVE_HEAP_PROFILING (0)
 
@@ -59,15 +62,6 @@
 #define AT_MARK (3)
 
 #define BLOCKS_PER_ATB (4)
-#define ATB_MASK_0 (0x03)
-#define ATB_MASK_1 (0x0c)
-#define ATB_MASK_2 (0x30)
-#define ATB_MASK_3 (0xc0)
-
-#define ATB_0_IS_FREE(a) (((a) & ATB_MASK_0) == 0)
-#define ATB_1_IS_FREE(a) (((a) & ATB_MASK_1) == 0)
-#define ATB_2_IS_FREE(a) (((a) & ATB_MASK_2) == 0)
-#define ATB_3_IS_FREE(a) (((a) & ATB_MASK_3) == 0)
 
 #define BLOCK_SHIFT(block) (2 * ((block) & (BLOCKS_PER_ATB - 1)))
 #define ATB_GET_KIND(block) ((MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] >> BLOCK_SHIFT(block)) & 3)
@@ -152,14 +146,19 @@ void gc_init(void *start, void *end) {
     memset(MP_STATE_MEM(gc_finaliser_table_start), 0, gc_finaliser_table_byte_len);
 #endif
 
-    // set last free ATB index to start of heap
+    // Set first free ATB index to the start of the heap.
     MP_STATE_MEM(gc_last_free_atb_index) = 0;
+    // Set last free ATB index to the end of the heap.
+    MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
+    // Set the lowest long lived ptr to the end of the heap to start. This will be lowered as long
+    // lived objects are allocated.
+    MP_STATE_MEM(gc_lowest_long_lived_ptr) = (void*) PTR_FROM_BLOCK(MP_STATE_MEM(gc_alloc_table_byte_len * BLOCKS_PER_ATB));
 
     // unlock the GC
     MP_STATE_MEM(gc_lock_depth) = 0;
 
     // allow auto collection
-    MP_STATE_MEM(gc_auto_collect_enabled) = 1;
+    MP_STATE_MEM(gc_auto_collect_enabled) = true;
 
     #if MICROPY_GC_ALLOC_THRESHOLD
     // by default, maxuint for gc threshold, effectively turning gc-by-threshold off
@@ -288,6 +287,7 @@ STATIC void gc_sweep(void) {
                 }
 #endif
                 free_tail = 1;
+                ATB_ANY_TO_FREE(block);
                 DEBUG_printf("gc_sweep(%x)\n", PTR_FROM_BLOCK(block));
 
                 #ifdef LOG_HEAP_ACTIVITY
@@ -296,7 +296,7 @@ STATIC void gc_sweep(void) {
                 #if MICROPY_PY_GC_COLLECT_RETVAL
                 MP_STATE_MEM(gc_collected)++;
                 #endif
-                // fall through to free the head
+                break;
 
             case AT_TAIL:
                 if (free_tail) {
@@ -338,7 +338,8 @@ void gc_collect_root(void **ptrs, size_t len) {
 void gc_collect_end(void) {
     gc_deal_with_stack_overflow();
     gc_sweep();
-    MP_STATE_MEM(gc_last_free_atb_index) = 0;
+    MP_STATE_MEM(gc_first_free_atb_index) = 0;
+    MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
     MP_STATE_MEM(gc_lock_depth)--;
     GC_EXIT();
 }
@@ -407,7 +408,9 @@ void gc_info(gc_info_t *info) {
     GC_EXIT();
 }
 
-void *gc_alloc(size_t n_bytes, bool has_finaliser) {
+// We place long lived objects at the end of the heap rather than the start. This reduces
+// fragmentation by localizing the heap churn to one portion of memory (the start of the heap.)
+void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {
     size_t n_blocks = ((n_bytes + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1))) / BYTES_PER_BLOCK;
     DEBUG_printf("gc_alloc(" UINT_FMT " bytes -> " UINT_FMT " blocks)\n", n_bytes, n_blocks);
 
@@ -424,29 +427,62 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser) {
         return NULL;
     }
 
-    size_t i;
+    size_t found_block = 0xffffffff;
     size_t end_block;
     size_t start_block;
-    size_t n_free = 0;
-    int collected = !MP_STATE_MEM(gc_auto_collect_enabled);
+    size_t n_free;
+    bool collected = !MP_STATE_MEM(gc_auto_collect_enabled);
 
     #if MICROPY_GC_ALLOC_THRESHOLD
     if (!collected && MP_STATE_MEM(gc_alloc_amount) >= MP_STATE_MEM(gc_alloc_threshold)) {
         GC_EXIT();
         gc_collect();
         GC_ENTER();
+        collected = true;
     }
     #endif
 
-    for (;;) {
-
+    bool keep_looking = true;
+
+    // When we start searching on the other side of the crossover block we make sure to
+    // perform a collect. That way we'll get the closest free block in our section.
+    size_t crossover_block = BLOCK_FROM_PTR(MP_STATE_MEM(gc_lowest_long_lived_ptr));
+    while (keep_looking) {
+        int8_t direction = 1;
+        size_t start = MP_STATE_MEM(gc_first_free_atb_index);
+        if (long_lived) {
+            direction = -1;
+            start = MP_STATE_MEM(gc_last_free_atb_index);
+        }
+        n_free = 0;
         // look for a run of n_blocks available blocks
-        for (i = MP_STATE_MEM(gc_last_free_atb_index); i < MP_STATE_MEM(gc_alloc_table_byte_len); i++) {
+        for (size_t i = start; keep_looking && MP_STATE_MEM(gc_first_free_atb_index) <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
             byte a = MP_STATE_MEM(gc_alloc_table_start)[i];
-            if (ATB_0_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 0; goto found; } } else { n_free = 0; }
-            if (ATB_1_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 1; goto found; } } else { n_free = 0; }
-            if (ATB_2_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 2; goto found; } } else { n_free = 0; }
-            if (ATB_3_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 3; goto found; } } else { n_free = 0; }
+            // Four ATB states are packed into a single byte.
+            int j = 0;
+            if (direction == -1) {
+                j = 3;
+            }
+            for (; keep_looking && 0 <= j && j <= 3; j += direction) {
+                if ((a & (0x3 << (j * 2))) == 0) {
+                    if (++n_free >= n_blocks) {
+                        found_block = i * BLOCKS_PER_ATB + j;
+                        keep_looking = false;
+                    }
+                } else {
+                    if (!collected) {
+                        size_t block = i * BLOCKS_PER_ATB + j;
+                        if ((direction == 1 && block >= crossover_block) ||
+                                (direction == -1 && block < crossover_block)) {
+                            keep_looking = false;
+                        }
+                    }
+                    n_free = 0;
+                }
+            }
+        }
+        if (n_free >= n_blocks) {
+            break;
         }
 
         GC_EXIT();
@@ -456,23 +492,31 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser) {
         }
         DEBUG_printf("gc_alloc(" UINT_FMT "): no free mem, triggering GC\n", n_bytes);
         gc_collect();
-        collected = 1;
+        collected = true;
+        // Try again since we've hopefully freed up space.
+        keep_looking = true;
         GC_ENTER();
     }
+    assert(found_block != 0xffffffff);
 
-    // found, ending at block i inclusive
-found:
-    // get starting and end blocks, both inclusive
-    end_block = i;
-    start_block = i - n_free + 1;
-
-    // Set last free ATB index to block after last block we found, for start of
+    // Found free space ending at found_block inclusive.
+    // Also, set last free ATB index to block after last block we found, for start of
     // next scan.  To reduce fragmentation, we only do this if we were looking
     // for a single free block, which guarantees that there are no free blocks
-    // before this one.  Also, whenever we free or shink a block we must check
+    // before this one.  Also, whenever we free or shrink a block we must check
     // if this index needs adjusting (see gc_realloc and gc_free).
-    if (n_free == 1) {
-        MP_STATE_MEM(gc_last_free_atb_index) = (i + 1) / BLOCKS_PER_ATB;
+    if (!long_lived) {
+        end_block = found_block;
+        start_block = found_block - n_free + 1;
+        if (n_blocks == 1) {
+            MP_STATE_MEM(gc_first_free_atb_index) = (found_block + 1) / BLOCKS_PER_ATB;
+        }
+    } else {
+        start_block = found_block;
+        end_block = found_block + n_free - 1;
+        if (n_blocks == 1) {
+            MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
+        }
     }
 
     #ifdef LOG_HEAP_ACTIVITY
@@ -493,6 +537,13 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser) {
     void *ret_ptr = (void*)(MP_STATE_MEM(gc_pool_start) + start_block * BYTES_PER_BLOCK);
     DEBUG_printf("gc_alloc(%p)\n", ret_ptr);
 
+    // If the allocation was long live then update the lowest value. Its used to trigger early
+    // collects when allocations fail in their respective section. Its also used to ignore calls to
+    // gc_make_long_lived where the pointer is already in the long lived section.
+    if (long_lived && ret_ptr < MP_STATE_MEM(gc_lowest_long_lived_ptr)) {
+        MP_STATE_MEM(gc_lowest_long_lived_ptr) = ret_ptr;
+    }
+
     #if MICROPY_GC_ALLOC_THRESHOLD
     MP_STATE_MEM(gc_alloc_amount) += n_blocks;
     #endif
@@ -566,7 +617,10 @@ void gc_free(void *ptr) {
         #endif
 
         // set the last_free pointer to this block if it's earlier in the heap
-        if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_last_free_atb_index)) {
+        if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
+            MP_STATE_MEM(gc_first_free_atb_index) = block / BLOCKS_PER_ATB;
+        }
+        if (block / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
             MP_STATE_MEM(gc_last_free_atb_index) = block / BLOCKS_PER_ATB;
         }
 
@@ -607,6 +661,50 @@ size_t gc_nbytes(const void *ptr) {
     return 0;
 }
 
+bool gc_has_finaliser(const void *ptr) {
+#if MICROPY_ENABLE_FINALISER
+    GC_ENTER();
+    if (VERIFY_PTR(ptr)) {
+        bool has_finaliser = FTB_GET(BLOCK_FROM_PTR(ptr));
+        GC_EXIT();
+        return has_finaliser;
+    }
+
+    // invalid pointer
+    GC_EXIT();
+#else
+    (void) ptr;
+#endif
+    return false;
+}
+
+void *gc_make_long_lived(void *old_ptr) {
+    // If its already in the long lived section then don't bother moving it.
+    if (old_ptr >= MP_STATE_MEM(gc_lowest_long_lived_ptr)) {
+        return old_ptr;
+    }
+    size_t n_bytes = gc_nbytes(old_ptr);
+    if (n_bytes == 0) {
+        return old_ptr;
+    }
+    bool has_finaliser = gc_has_finaliser(old_ptr);
+
+    // Try and find a new area in the long lived section to copy the memory to.
+    void* new_ptr = gc_alloc(n_bytes, has_finaliser, true);
+    if (new_ptr == NULL) {
+        return old_ptr;
+    } else if (old_ptr > new_ptr) {
+        // Return the old pointer if the new one is lower in the heap and free the new space.
+        gc_free(new_ptr);
+        return old_ptr;
+    }
+    // We copy everything over and let the garbage collection process delete the old copy. That way
+    // we ensure we don't delete memory that has a second reference. (Though if there is we may
+    // confuse things when its mutable.)
+    memcpy(new_ptr, old_ptr, n_bytes);
+    return new_ptr;
+}
+
 #if 0
 // old, simple realloc that didn't expand memory in place
 void *gc_realloc(void *ptr, mp_uint_t n_bytes) {
@@ -639,7 +737,7 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) {
 void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
     // check for pure allocation
     if (ptr_in == NULL) {
-        return gc_alloc(n_bytes, false);
+        return gc_alloc(n_bytes, false, false);
     }
 
     // check for pure free
@@ -714,7 +812,10 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
         }
 
         // set the last_free pointer to end of this block if it's earlier in the heap
-        if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_last_free_atb_index)) {
+        if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
+            MP_STATE_MEM(gc_first_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
+        }
+        if ((block + new_blocks) / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
             MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
         }
 
@@ -774,7 +875,7 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
     }
 
     // can't resize inplace; try to find a new contiguous chain
-    void *ptr_out = gc_alloc(n_bytes, ftb_state);
+    void *ptr_out = gc_alloc(n_bytes, ftb_state, false);
 
     // check that the alloc succeeded
     if (ptr_out == NULL) {
diff --git a/py/gc.h b/py/gc.h
index 739349c1f5b28..e5878e1eed117 100644
--- a/py/gc.h
+++ b/py/gc.h
@@ -45,9 +45,11 @@ void gc_collect_start(void);
 void gc_collect_root(void **ptrs, size_t len);
 void gc_collect_end(void);
 
-void *gc_alloc(size_t n_bytes, bool has_finaliser);
+void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived);
 void gc_free(void *ptr); // does not call finaliser
 size_t gc_nbytes(const void *ptr);
+bool gc_has_finaliser(const void *ptr);
+void *gc_make_long_lived(void *old_ptr);
 void *gc_realloc(void *ptr, size_t n_bytes, bool allow_move);
 
 typedef struct _gc_info_t {
diff --git a/py/gc_long_lived.c b/py/gc_long_lived.c
new file mode 100644
index 0000000000000..bd0e63d9be60a
--- /dev/null
+++ b/py/gc_long_lived.c
@@ -0,0 +1,132 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2018 Scott Shawcroft for Adafruit Industries LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/emitglue.h"
+#include "py/gc_long_lived.h"
+#include "py/gc.h"
+
+mp_obj_fun_bc_t *make_fun_bc_long_lived(mp_obj_fun_bc_t *fun_bc, uint8_t max_depth) {
+    #ifndef MICROPY_ENABLE_GC
+    return fun_bc;
+    #endif
+    if (fun_bc == NULL || fun_bc == mp_const_none || max_depth == 0) {
+        return fun_bc;
+    }
+    fun_bc->bytecode = gc_make_long_lived((byte*) fun_bc->bytecode);
+    fun_bc->globals = make_dict_long_lived(fun_bc->globals, max_depth - 1);
+    for (uint32_t i = 0; i < gc_nbytes(fun_bc->const_table) / sizeof(mp_obj_t); i++) {
+        // Skip things that aren't allocated on the heap (and hence have zero bytes.)
+        if (gc_nbytes((byte *)fun_bc->const_table[i]) == 0) {
+            continue;
+        }
+        // Try to detect raw code.
+        mp_raw_code_t* raw_code = MP_OBJ_TO_PTR(fun_bc->const_table[i]);
+        if (raw_code->kind == MP_CODE_BYTECODE) {
+            raw_code->data.u_byte.bytecode = gc_make_long_lived((byte*) raw_code->data.u_byte.bytecode);
+            // TODO(tannewt): Do we actually want to recurse here?
+            raw_code->data.u_byte.const_table = gc_make_long_lived((byte*) raw_code->data.u_byte.const_table);
+        }
+        ((mp_uint_t *) fun_bc->const_table)[i] = (mp_uint_t) make_obj_long_lived(
+            (mp_obj_t) fun_bc->const_table[i], max_depth - 1);
+
+    }
+    fun_bc->const_table = gc_make_long_lived((mp_uint_t*) fun_bc->const_table);
+    // extra_args stores keyword only argument default values.
+    size_t words = gc_nbytes(fun_bc) / sizeof(mp_uint_t*);
+    for (size_t i = 0; i < words - 4; i++) {
+        if (fun_bc->extra_args[i] == NULL) {
+            continue;
+        }
+        if (MP_OBJ_IS_TYPE(fun_bc->extra_args[i], &mp_type_dict)) {
+            fun_bc->extra_args[i] = make_dict_long_lived(fun_bc->extra_args[i], max_depth - 1);
+        } else {
+            fun_bc->extra_args[i] = make_obj_long_lived(fun_bc->extra_args[i], max_depth - 1);
+        }
+
+    }
+    return gc_make_long_lived(fun_bc);
+}
+
+mp_obj_property_t *make_property_long_lived(mp_obj_property_t *prop, uint8_t max_depth) {
+    #ifndef MICROPY_ENABLE_GC
+    return prop;
+    #endif
+    if (max_depth == 0) {
+        return prop;
+    }
+    prop->proxy[0] = make_fun_bc_long_lived((mp_obj_fun_bc_t*) prop->proxy[0], max_depth - 1);
+    prop->proxy[1] = make_fun_bc_long_lived((mp_obj_fun_bc_t*) prop->proxy[1], max_depth - 1);
+    prop->proxy[2] = make_fun_bc_long_lived((mp_obj_fun_bc_t*) prop->proxy[2], max_depth - 1);
+    return gc_make_long_lived(prop);
+}
+
+mp_obj_dict_t *make_dict_long_lived(mp_obj_dict_t *dict, uint8_t max_depth) {
+    #ifndef MICROPY_ENABLE_GC
+    return dict;
+    #endif
+    if (dict == NULL || max_depth == 0) {
+        return dict;
+    }
+    // Update all of the references first so that we reduce the chance of references to the old
+    // copies.
+    dict->map.table = gc_make_long_lived(dict->map.table);
+    for (size_t i = 0; i < dict->map.alloc; i++) {
+        if (MP_MAP_SLOT_IS_FILLED(&dict->map, i)) {
+            mp_obj_t value = dict->map.table[i].value;
+            dict->map.table[i].value = make_obj_long_lived(value, max_depth - 1);
+        }
+    }
+    return gc_make_long_lived(dict);
+}
+
+mp_obj_str_t *make_str_long_lived(mp_obj_str_t *str) {
+    str->data = gc_make_long_lived((byte *) str->data);
+    return gc_make_long_lived(str);
+}
+
+mp_obj_t make_obj_long_lived(mp_obj_t obj, uint8_t max_depth){
+    #ifndef MICROPY_ENABLE_GC
+    return obj;
+    #endif
+    if (obj == NULL) {
+        return obj;
+    }
+    if (MP_OBJ_IS_TYPE(obj, &mp_type_fun_bc)) {
+        mp_obj_fun_bc_t *fun_bc = MP_OBJ_TO_PTR(obj);
+        return MP_OBJ_FROM_PTR(make_fun_bc_long_lived(fun_bc, max_depth));
+    } else if (MP_OBJ_IS_TYPE(obj, &mp_type_property)) {
+        mp_obj_property_t *prop = MP_OBJ_TO_PTR(obj);
+        return MP_OBJ_FROM_PTR(make_property_long_lived(prop, max_depth));
+    } else if (MP_OBJ_IS_TYPE(obj, &mp_type_str)) {
+        mp_obj_str_t *str = MP_OBJ_TO_PTR(obj);
+        return MP_OBJ_FROM_PTR(make_str_long_lived(str));
+    } else if (MP_OBJ_IS_TYPE(obj, &mp_type_type)) {
+        // Types are already long lived during creation.
+        return obj;
+    } else {
+        return gc_make_long_lived(obj);
+    }
+}
diff --git a/py/gc_long_lived.h b/py/gc_long_lived.h
new file mode 100644
index 0000000000000..229bc73911c8f
--- /dev/null
+++ b/py/gc_long_lived.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2018 Scott Shawcroft for Adafruit Industries LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// These helpers move MicroPython objects and their sub-objects to the long lived portion of the
+// heap.
+
+#ifndef MICROPY_INCLUDED_PY_GC_LONG_LIVED_H
+#define MICROPY_INCLUDED_PY_GC_LONG_LIVED_H
+
+#include "py/objfun.h"
+#include "py/objproperty.h"
+#include "py/objstr.h"
+
+mp_obj_fun_bc_t *make_fun_bc_long_lived(mp_obj_fun_bc_t *fun_bc, uint8_t max_depth);
+mp_obj_property_t *make_property_long_lived(mp_obj_property_t *prop, uint8_t max_depth);
+mp_obj_dict_t *make_dict_long_lived(mp_obj_dict_t *dict, uint8_t max_depth);
+mp_obj_str_t *make_str_long_lived(mp_obj_str_t *str);
+mp_obj_t make_obj_long_lived(mp_obj_t obj, uint8_t max_depth);
+
+#endif // MICROPY_INCLUDED_PY_GC_LONG_LIVED_H
diff --git a/py/malloc.c b/py/malloc.c
index ea1d4c4b9e022..d6983ffdfcef7 100644
--- a/py/malloc.c
+++ b/py/malloc.c
@@ -53,12 +53,15 @@
 #undef malloc
 #undef free
 #undef realloc
-#define malloc(b) gc_alloc((b), false)
-#define malloc_with_finaliser(b) gc_alloc((b), true)
+#define malloc_ll(b, ll) gc_alloc((b), false, (ll))
+#define malloc_with_finaliser(b) gc_alloc((b), true, false)
 #define free gc_free
 #define realloc(ptr, n) gc_realloc(ptr, n, true)
 #define realloc_ext(ptr, n, mv) gc_realloc(ptr, n, mv)
 #else
+#define malloc_ll(b, ll) malloc(b)
+#define malloc_with_finaliser(b) malloc((b))
+
 STATIC void *realloc_ext(void *ptr, size_t n_bytes, bool allow_move) {
     if (allow_move) {
         return realloc(ptr, n_bytes);
@@ -71,8 +74,8 @@ STATIC void *realloc_ext(void *ptr, size_t n_bytes, bool allow_move) {
 }
 #endif // MICROPY_ENABLE_GC
 
-void *m_malloc(size_t num_bytes) {
-    void *ptr = malloc(num_bytes);
+void *m_malloc(size_t num_bytes, bool long_lived) {
+    void *ptr = malloc_ll(num_bytes, long_lived);
     if (ptr == NULL && num_bytes != 0) {
         m_malloc_fail(num_bytes);
     }
@@ -85,8 +88,8 @@ void *m_malloc(size_t num_bytes) {
     return ptr;
 }
 
-void *m_malloc_maybe(size_t num_bytes) {
-    void *ptr = malloc(num_bytes);
+void *m_malloc_maybe(size_t num_bytes, bool long_lived) {
+    void *ptr = malloc_ll(num_bytes, long_lived);
 #if MICROPY_MEM_STATS
     MP_STATE_MEM(total_bytes_allocated) += num_bytes;
     MP_STATE_MEM(current_bytes_allocated) += num_bytes;
@@ -112,8 +115,8 @@ void *m_malloc_with_finaliser(size_t num_bytes) {
 }
 #endif
 
-void *m_malloc0(size_t num_bytes) {
-    void *ptr = m_malloc(num_bytes);
+void *m_malloc0(size_t num_bytes, bool long_lived) {
+    void *ptr = m_malloc(num_bytes, long_lived);
     if (ptr == NULL && num_bytes != 0) {
         m_malloc_fail(num_bytes);
     }
diff --git a/py/misc.h b/py/misc.h
index 995e4549cd6c7..d9a8efe7f85b6 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -56,13 +56,18 @@ typedef unsigned int uint;
 
 // TODO make a lazy m_renew that can increase by a smaller amount than requested (but by at least 1 more element)
 
-#define m_new(type, num) ((type*)(m_malloc(sizeof(type) * (num))))
-#define m_new_maybe(type, num) ((type*)(m_malloc_maybe(sizeof(type) * (num))))
-#define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num))))
+#define m_new(type, num) ((type*)(m_malloc(sizeof(type) * (num), false)))
+#define m_new_ll(type, num) ((type*)(m_malloc(sizeof(type) * (num), true)))
+#define m_new_maybe(type, num) ((type*)(m_malloc_maybe(sizeof(type) * (num), false)))
+#define m_new_ll_maybe(type, num) ((type*)(m_malloc_maybe(sizeof(type) * (num), true)))
+#define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num), false)))
+#define m_new0_ll(type, num) ((type*)(m_malloc0(sizeof(type) * (num), true)))
 #define m_new_obj(type) (m_new(type, 1))
+#define m_new_ll_obj(type) (m_new_ll(type, 1))
 #define m_new_obj_maybe(type) (m_new_maybe(type, 1))
-#define m_new_obj_var(obj_type, var_type, var_num) ((obj_type*)m_malloc(sizeof(obj_type) + sizeof(var_type) * (var_num)))
-#define m_new_obj_var_maybe(obj_type, var_type, var_num) ((obj_type*)m_malloc_maybe(sizeof(obj_type) + sizeof(var_type) * (var_num)))
+#define m_new_obj_var(obj_type, var_type, var_num) ((obj_type*)m_malloc(sizeof(obj_type) + sizeof(var_type) * (var_num), false))
+#define m_new_obj_var_maybe(obj_type, var_type, var_num) ((obj_type*)m_malloc_maybe(sizeof(obj_type) + sizeof(var_type) * (var_num), false))
+#define m_new_ll_obj_var_maybe(obj_type, var_type, var_num) ((obj_type*)m_malloc_maybe(sizeof(obj_type) + sizeof(var_type) * (var_num), true))
 #if MICROPY_ENABLE_FINALISER
 #define m_new_obj_with_finaliser(type) ((type*)(m_malloc_with_finaliser(sizeof(type))))
 #else
@@ -81,10 +86,10 @@ typedef unsigned int uint;
 #endif
 #define m_del_obj(type, ptr) (m_del(type, ptr, 1))
 
-void *m_malloc(size_t num_bytes);
-void *m_malloc_maybe(size_t num_bytes);
+void *m_malloc(size_t num_bytes, bool long_lived);
+void *m_malloc_maybe(size_t num_bytes, bool long_lived);
 void *m_malloc_with_finaliser(size_t num_bytes);
-void *m_malloc0(size_t num_bytes);
+void *m_malloc0(size_t num_bytes, bool long_lived);
 #if MICROPY_MALLOC_USES_ALLOCATED_SIZE
 void *m_realloc(void *ptr, size_t old_num_bytes, size_t new_num_bytes);
 void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move);
diff --git a/py/modbuiltins.c b/py/modbuiltins.c
index 72830929254f6..5e11b8d18cdcf 100644
--- a/py/modbuiltins.c
+++ b/py/modbuiltins.c
@@ -79,7 +79,7 @@ STATIC mp_obj_t mp_builtin___build_class__(size_t n_args, const mp_obj_t *args)
     meta_args[2] = class_locals; // dict of members
     mp_obj_t new_class = mp_call_function_n_kw(meta, 3, 0, meta_args);
 
-    // store into cell if neede
+    // store into cell if needed
     if (cell != mp_const_none) {
         mp_obj_cell_set(cell, new_class);
     }
diff --git a/py/mpstate.h b/py/mpstate.h
index 23133cb7e4cde..19a5d711edf67 100644
--- a/py/mpstate.h
+++ b/py/mpstate.h
@@ -76,21 +76,24 @@ typedef struct _mp_state_mem_t {
     byte *gc_pool_start;
     byte *gc_pool_end;
 
+    void *gc_lowest_long_lived_ptr;
+
     int gc_stack_overflow;
     size_t gc_stack[MICROPY_ALLOC_GC_STACK_SIZE];
     size_t *gc_sp;
     uint16_t gc_lock_depth;
 
-    // This variable controls auto garbage collection.  If set to 0 then the
+    // This variable controls auto garbage collection.  If set to false then the
     // GC won't automatically run when gc_alloc can't find enough blocks.  But
     // you can still allocate/free memory and also explicitly call gc_collect.
-    uint16_t gc_auto_collect_enabled;
+    bool gc_auto_collect_enabled;
 
     #if MICROPY_GC_ALLOC_THRESHOLD
     size_t gc_alloc_amount;
     size_t gc_alloc_threshold;
     #endif
 
+    size_t gc_first_free_atb_index;
     size_t gc_last_free_atb_index;
 
     #if MICROPY_PY_GC_COLLECT_RETVAL
diff --git a/py/obj.h b/py/obj.h
index 3f784dafcc291..e4d950b97ca29 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -816,6 +816,7 @@ typedef struct _mp_obj_module_t {
     mp_obj_dict_t *globals;
 } mp_obj_module_t;
 mp_obj_dict_t *mp_obj_module_get_globals(mp_obj_t self_in);
+void mp_obj_module_set_globals(mp_obj_t self_in, mp_obj_dict_t *globals);
 // check if given module object is a package
 bool mp_obj_is_package(mp_obj_t module);
 
diff --git a/py/objmodule.c b/py/objmodule.c
index 52c6520b9d9e9..86e37850efeb3 100644
--- a/py/objmodule.c
+++ b/py/objmodule.c
@@ -27,6 +27,7 @@
 #include <stdlib.h>
 #include <assert.h>
 
+#include "py/gc.h"
 #include "py/objmodule.h"
 #include "py/runtime.h"
 #include "py/builtin.h"
@@ -84,8 +85,9 @@ STATIC void module_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
             mp_obj_dict_delete(MP_OBJ_FROM_PTR(dict), MP_OBJ_NEW_QSTR(attr));
         } else {
             // store attribute
+            mp_obj_t long_lived = gc_make_long_lived(dest[1]);
             // TODO CPython allows STORE_ATTR to a module, but is this the correct implementation?
-            mp_obj_dict_store(MP_OBJ_FROM_PTR(dict), MP_OBJ_NEW_QSTR(attr), dest[1]);
+            mp_obj_dict_store(MP_OBJ_FROM_PTR(dict), MP_OBJ_NEW_QSTR(attr), long_lived);
         }
         dest[0] = MP_OBJ_NULL; // indicate success
     }
@@ -108,9 +110,9 @@ mp_obj_t mp_obj_new_module(qstr module_name) {
     }
 
     // create new module object
-    mp_obj_module_t *o = m_new_obj(mp_obj_module_t);
+    mp_obj_module_t *o = m_new_ll_obj(mp_obj_module_t);
     o->base.type = &mp_type_module;
-    o->globals = MP_OBJ_TO_PTR(mp_obj_new_dict(MICROPY_MODULE_DICT_SIZE));
+    o->globals = MP_OBJ_TO_PTR(gc_make_long_lived(mp_obj_new_dict(MICROPY_MODULE_DICT_SIZE)));
 
     // store __name__ entry in the module
     mp_obj_dict_store(MP_OBJ_FROM_PTR(o->globals), MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(module_name));
@@ -128,6 +130,12 @@ mp_obj_dict_t *mp_obj_module_get_globals(mp_obj_t self_in) {
     return self->globals;
 }
 
+void mp_obj_module_set_globals(mp_obj_t self_in, mp_obj_dict_t *globals) {
+    assert(MP_OBJ_IS_TYPE(self_in, &mp_type_module));
+    mp_obj_module_t *self = MP_OBJ_TO_PTR(self_in);
+    self->globals = globals;
+}
+
 /******************************************************************************/
 // Global module table and related functions
 
diff --git a/py/objtype.c b/py/objtype.c
index 2fefc65ade616..1d6dc0b6cd4ac 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -30,6 +30,7 @@
 #include <string.h>
 #include <assert.h>
 
+#include "py/gc_long_lived.h"
 #include "py/objtype.h"
 #include "py/runtime.h"
 
@@ -960,7 +961,7 @@ STATIC void type_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
                 mp_map_elem_t *elem = mp_map_lookup(locals_map, MP_OBJ_NEW_QSTR(attr), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
                 // note that locals_map may be in ROM, so add will fail in that case
                 if (elem != NULL) {
-                    elem->value = dest[1];
+                    elem->value = make_obj_long_lived(dest[1], 10);
                     dest[0] = MP_OBJ_NULL; // indicate success
                 }
             }
@@ -1002,7 +1003,7 @@ mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict)
         }
     }
 
-    mp_obj_type_t *o = m_new0(mp_obj_type_t, 1);
+    mp_obj_type_t *o = m_new0_ll(mp_obj_type_t, 1);
     o->base.type = &mp_type_type;
     o->name = name;
     o->print = instance_print;
@@ -1030,7 +1031,7 @@ mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict)
         }
     }
 
-    o->locals_dict = MP_OBJ_TO_PTR(locals_dict);
+    o->locals_dict = make_dict_long_lived(locals_dict, 10);
 
     const mp_obj_type_t *native_base;
     size_t num_native_bases = instance_count_native_bases(o, &native_base);
diff --git a/py/py.mk b/py/py.mk
index 9021192df8ab1..21f81c35d58bb 100644
--- a/py/py.mk
+++ b/py/py.mk
@@ -110,6 +110,7 @@ PY_O_BASENAME = \
 	nlrsetjmp.o \
 	malloc.o \
 	gc.o \
+	gc_long_lived.o \
 	qstr.o \
 	vstr.o \
 	mpprint.o \
diff --git a/py/qstr.c b/py/qstr.c
index 95c9b6835efdb..49b2d9daaa83c 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -28,6 +28,7 @@
 #include <string.h>
 #include <stdio.h>
 
+#include "py/gc.h"
 #include "py/mpstate.h"
 #include "py/qstr.h"
 #include "py/gc.h"
@@ -143,7 +144,7 @@ STATIC qstr qstr_add(const byte *q_ptr) {
 
     // make sure we have room in the pool for a new qstr
     if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
-        qstr_pool_t *pool = m_new_obj_var_maybe(qstr_pool_t, const char*, MP_STATE_VM(last_pool)->alloc * 2);
+        qstr_pool_t *pool = m_new_ll_obj_var_maybe(qstr_pool_t, const char*, MP_STATE_VM(last_pool)->alloc * 2);
         if (pool == NULL) {
             QSTR_EXIT();
             m_malloc_fail(MP_STATE_VM(last_pool)->alloc * 2);
@@ -213,10 +214,10 @@ qstr qstr_from_strn(const char *str, size_t len) {
             if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
                 al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
             }
-            MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, al);
+            MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(byte, al);
             if (MP_STATE_VM(qstr_last_chunk) == NULL) {
                 // failed to allocate a large chunk so try with exact size
-                MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, n_bytes);
+                MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(byte, n_bytes);
                 if (MP_STATE_VM(qstr_last_chunk) == NULL) {
                     QSTR_EXIT();
                     m_malloc_fail(n_bytes);
@@ -258,7 +259,7 @@ qstr qstr_build_end(byte *q_ptr) {
         mp_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len);
         Q_SET_HASH(q_ptr, hash);
         q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
-        q = qstr_add(q_ptr);
+        q = qstr_add(gc_make_long_lived(q_ptr));
     } else {
         m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr));
     }

From da330f0cab09c9150b588ee0c1cb4e09edaee9fc Mon Sep 17 00:00:00 2001
From: Scott Shawcroft <scott.shawcroft@gmail.com>
Date: Tue, 23 Jan 2018 16:22:43 -0800
Subject: [PATCH 4/5] Polish up the heap analysis script and make it more CLI
 friendly.

It can now render the heap layout over a sequence of ram dumps.

The mpy analysis is also better at parsing mpy files.
---
 tools/analyze_heap_dump.py     | 843 +++++++++++++++++++--------------
 tools/analyze_mpy.py           | 315 ++++++++++--
 tools/gc_activity.md           |   2 +-
 tools/output_gc_until_repl.txt |   5 +-
 4 files changed, 767 insertions(+), 398 deletions(-)

diff --git a/tools/analyze_heap_dump.py b/tools/analyze_heap_dump.py
index 31b66b9dfc92e..d5da884e482ae 100644
--- a/tools/analyze_heap_dump.py
+++ b/tools/analyze_heap_dump.py
@@ -9,6 +9,10 @@
 import pygraphviz as pgv
 import io
 import html
+import os.path
+import string
+
+import click
 
 from analyze_mpy import Prelude
 
@@ -32,176 +36,211 @@
 
 SKIP_SYMBOLS = [".debug_ranges", ".debug_frame", ".debug_loc", ".comment", ".debug_str", ".debug_line", ".debug_abbrev", ".debug_info", "COMMON"]
 
-ownership_graph = pgv.AGraph(directed=True)
-
-with open(sys.argv[1], "rb") as f:
-    ram = f.read()
-
-with open(sys.argv[2], "rb") as f:
-    rom = f.read()
-
-symbols = {} # name -> address, size
-symbol_lookup = {} # address -> name
-manual_symbol_map = {} # autoname -> name
-
-def add_symbol(name, address=None, size=None):
-    global symbols
-    if address:
-        address = int(address, 0)
-    if size:
-        size = int(size, 0)
-    if name in symbols:
-        if address and symbols[name][0] and symbols[name][0] != address:
-            print("Conflicting symbol: {}".format(name))
-            return
-        if not address:
-            address = symbols[name][0]
-        if not size:
-            size = symbols[name][1]
-    symbols[name] = (address, size)
-    if address:
-        if not size:
-            size = 4
-        for offset in range(0, size, 4):
-            symbol_lookup[address + offset] = "{}+{}".format(name, offset)
-
-with open(sys.argv[3], "r") as f:
-    common_symbols = False
-    name = None
-    for line in f:
-        line = line.strip()
-        parts = line.split()
-        if line.startswith("Common symbol"):
-            common_symbols = True
-        if line == "Discarded input sections":
-            common_symbols = False
-        if common_symbols:
-            if len(parts) == 1:
-                name = parts[0]
-            elif len(parts) == 2 and name:
-                add_symbol(name, size=parts[0])
-                name = None
-            elif len(parts) == 3:
-                add_symbol(parts[0], size=parts[1])
-                name = None
-        else:
-            if len(parts) == 2 and parts[0].startswith("0x") and not parts[1].startswith("0x"):
-                add_symbol(parts[1], parts[0])
-            if len(parts) == 4 and parts[0] not in SKIP_SYMBOLS and parts[1].startswith("0x") and parts[2].startswith("0x"):
-                name, address, size, source = parts
-                if name.startswith((".text", ".rodata", ".bss")) and name.count(".") > 1:
-                    name = name.split(".")[-1]
-                    add_symbol(name, address, size)
-            # Linker symbols
-            if len(parts) >= 4 and parts[0].startswith("0x") and parts[2] == "=" and parts[1] != ".":
-                add_symbol(parts[1], parts[0])
-
-rom_start = symbols["_sfixed"][0]
-ram_start = symbols["_srelocate"][0]
-
-def load(address, size=4):
-    if size is None:
-        raise ValueError("You must provide a size")
-    if address > ram_start:
-        ram_address = address - ram_start
-        if (ram_address + size) > len(ram):
-            raise ValueError("Unable to read 0x{:08x} from ram.".format(address))
-        return ram[ram_address:ram_address+size]
-    elif address < len(rom):
-        if (address + size) > len(rom):
-            raise ValueError("Unable to read 0x{:08x} from rom.".format(address))
-        return rom[address:address+size]
-
-def load_pointer(address):
-    return struct.unpack("<I", load(address))[0]
-
-heap_start, heap_size = symbols["heap"]
-heap = load(heap_start, heap_size)
-total_byte_len = len(heap)
-
-# These change every run so we load them from the symbol table
-mp_state_ctx = symbols["mp_state_ctx"][0]
-manual_symbol_map["mp_state_ctx+24"] = "mp_state_ctx.vm.last_pool"
-last_pool = load_pointer(mp_state_ctx + 24) # (gdb) p &mp_state_ctx.vm.last_pool
-manual_symbol_map["mp_state_ctx+92"] = "mp_state_ctx.vm.dict_main.map.table"
-dict_main_table = load_pointer(mp_state_ctx + 92) # (gdb) p &mp_state_ctx.vm.dict_main.map.table
-manual_symbol_map["mp_state_ctx+72"] = "mp_state_ctx.vm.mp_loaded_modules_dict.map.table"
-imports_table = load_pointer(mp_state_ctx + 72) # (gdb) p &mp_state_ctx.vm.mp_loaded_modules_dict.map.table
-
-manual_symbol_map["mp_state_ctx+108"] = "mp_state_ctx.vm.mp_sys_path_obj.items"
-manual_symbol_map["mp_state_ctx+124"] = "mp_state_ctx.vm.mp_sys_argv_obj.items"
-
-for i in range(READLINE_HIST_SIZE):
-    manual_symbol_map["mp_state_ctx+{}".format(128 + i * 4)] = "mp_state_ctx.vm.readline_hist[{}]".format(i)
-
-tuple_type = symbols["mp_type_tuple"][0]
-type_type = symbols["mp_type_type"][0]
-map_type = symbols["mp_type_map"][0]
-dict_type = symbols["mp_type_dict"][0]
-property_type = symbols["mp_type_property"][0]
-str_type = symbols["mp_type_str"][0]
-function_types = [symbols["mp_type_fun_" + x][0] for x in ["bc", "builtin_0", "builtin_1", "builtin_2", "builtin_3", "builtin_var"]]
-bytearray_type = symbols["mp_type_bytearray"][0]
-
-dynamic_type = 0x40000000 # placeholder, doesn't match any memory
-
-type_colors = {
-    dict_type: "red",
-    property_type: "yellow",
-    map_type: "blue",
-    type_type: "orange",
-    tuple_type: "skyblue",
-    str_type: "pink",
-    bytearray_type: "purple"
-    }
-
-pool_shift = heap_start % BYTES_PER_BLOCK
-
-print("Total heap length:", total_byte_len)
-atb_length = total_byte_len * BITS_PER_BYTE // (BITS_PER_BYTE + BITS_PER_BYTE * BLOCKS_PER_ATB // BLOCKS_PER_FTB + BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK)
-
-print("ATB length:", atb_length)
-pool_length = atb_length * BLOCKS_PER_ATB * BYTES_PER_BLOCK
-print("Total allocatable:", pool_length)
-
-gc_finaliser_table_byte_len = (atb_length * BLOCKS_PER_ATB + BLOCKS_PER_FTB - 1) // BLOCKS_PER_FTB
-print("FTB length:", gc_finaliser_table_byte_len)
-
-pool_start = heap_start + total_byte_len - pool_length - pool_shift
-pool = heap[-pool_length-pool_shift:]
-
-map_element_blocks = [dict_main_table, imports_table]
-string_blocks = []
-bytecode_blocks = []
-qstr_pools = []
-qstr_chunks = []
-block_data = {}
-
-# Find all the qtr pool addresses.
-prev_pool = last_pool
-while prev_pool > ram_start:
-    qstr_pools.append(prev_pool)
-    prev_pool = load_pointer(prev_pool)
-
-longest_free = 0
-current_free = 0
-current_allocation = 0
-total_free = 0
-for i in range(atb_length):
-    # Each atb byte is four blocks worth of info
-    atb = heap[i]
-    for j in range(4):
-        block_state = (atb >> (j * 2)) & 0x3
-        if block_state != AT_FREE and current_free > 0:
-            print("{} bytes free".format(current_free * BYTES_PER_BLOCK))
-            current_free = 0
-        if block_state != AT_TAIL and current_allocation > 0:
+@click.command()
+@click.argument("ram_filename")
+@click.argument("bin_filename")
+@click.argument("map_filename")
+@click.option("--print_block_contents", default=False,
+              help="Prints the contents of each allocated block")
+@click.option("--print_unknown_types", default=False,
+              help="Prints the micropython base type if we don't understand it.")
+@click.option("--print_block_state", default=False,
+              help="Prints the heap block states (allocated or free)")
+@click.option("--print_conflicting_symbols", default=False,
+              help="Prints conflicting symbols from the map")
+@click.option("--print-heap-structure/--no-print-heap-structure", default=False,
+              help="Print heap structure")
+@click.option("--output_directory", default="heapvis",
+              help="Destination for rendered output")
+@click.option("--draw-heap-layout/--no-draw-heap-layout", default=True,
+              help="Draw the heap layout")
+@click.option("--draw-heap-ownership/--no-draw-heap-ownership", default=False,
+              help="Draw the ownership graph of blocks on the heap")
+@click.option("--draw-heap-ownership/--no-draw-heap-ownership", default=False,
+              help="Draw the ownership graph of blocks on the heap")
+@click.option("--analyze-snapshots", default="last", type=click.Choice(['all', 'last']))
+def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_contents,
+                      print_unknown_types, print_block_state, print_conflicting_symbols,
+                      print_heap_structure, output_directory, draw_heap_layout,
+                      draw_heap_ownership, analyze_snapshots):
+    with open(ram_filename, "rb") as f:
+        ram_dump = f.read()
+
+    with open(bin_filename, "rb") as f:
+        rom = f.read()
+
+    symbols = {} # name -> address, size
+    symbol_lookup = {} # address -> name
+    manual_symbol_map = {} # autoname -> name
+
+    def add_symbol(name, address=None, size=None):
+        if "lto_priv" in name:
+            name = name.split(".")[0]
+        if address:
+            address = int(address, 0)
+        if size:
+            size = int(size, 0)
+        if name in symbols:
+            if address and symbols[name][0] and symbols[name][0] != address:
+                if print_conflicting_symbols:
+                    print("Conflicting symbol: {} at addresses 0x{:08x} and 0x{:08x}".format(name, address, symbols[name][0]))
+                return
+            if not address:
+                address = symbols[name][0]
+            if not size:
+                size = symbols[name][1]
+        symbols[name] = (address, size)
+        if address:
+            if not size:
+                size = 4
+            for offset in range(0, size, 4):
+                symbol_lookup[address + offset] = "{}+{}".format(name, offset)
+
+    with open(map_filename, "r") as f:
+        common_symbols = False
+        name = None
+        for line in f:
+            line = line.strip()
+            parts = line.split()
+            if line.startswith("Common symbol"):
+                common_symbols = True
+            if line == "Discarded input sections":
+                common_symbols = False
+            if common_symbols:
+                if len(parts) == 1:
+                    name = parts[0]
+                elif len(parts) == 2 and name:
+                    add_symbol(name, size=parts[0])
+                    name = None
+                elif len(parts) == 3:
+                    add_symbol(parts[0], size=parts[1])
+                    name = None
+            else:
+                if len(parts) == 1 and parts[0].startswith((".text", ".rodata", ".bss")) and parts[0].count(".") > 1 and not parts[0].isnumeric() and ".str" not in parts[0]:
+                    name = parts[0].split(".")[2]
+                if len(parts) == 3 and parts[0].startswith("0x") and parts[1].startswith("0x") and name:
+                    add_symbol(name, parts[0], parts[1])
+                    name = None
+                if len(parts) == 2 and parts[0].startswith("0x") and not parts[1].startswith("0x"):
+                    add_symbol(parts[1], parts[0])
+                if len(parts) == 4 and parts[0] not in SKIP_SYMBOLS and parts[1].startswith("0x") and parts[2].startswith("0x"):
+                    name, address, size, source = parts
+                    if name.startswith((".text", ".rodata", ".bss")) and name.count(".") > 1:
+                        name = name.split(".")[-1]
+                        add_symbol(name, address, size)
+                    name = None
+                # Linker symbols
+                if len(parts) >= 4 and parts[0].startswith("0x") and parts[2] == "=" and parts[1] != ".":
+                    add_symbol(parts[1], parts[0])
+
+    rom_start = symbols["_sfixed"][0]
+    ram_start = symbols["_srelocate"][0]
+    ram_end = symbols["_estack"][0]
+    ram_length = ram_end - ram_start
+    if analyze_snapshots == "all":
+        snapshots = range(len(ram_dump) // ram_length - 1, -1, -1)
+    elif analyze_snapshots == "last":
+        snapshots = range(len(ram_dump) // ram_length - 1, len(ram_dump) // ram_length - 2, -1)
+    for snapshot_num in snapshots:
+        ram = ram_dump[ram_length*snapshot_num:ram_length*(snapshot_num + 1)]
+
+        ownership_graph = pgv.AGraph(directed=True)
+        def load(address, size=4):
+            if size is None:
+                raise ValueError("You must provide a size")
+            if address > ram_start:
+                ram_address = address - ram_start
+                if (ram_address + size) > len(ram):
+                    raise ValueError("Unable to read 0x{:08x} from ram.".format(address))
+                return ram[ram_address:ram_address+size]
+            elif address < len(rom):
+                if (address + size) > len(rom):
+                    raise ValueError("Unable to read 0x{:08x} from rom.".format(address))
+                return rom[address:address+size]
+
+        def load_pointer(address):
+            return struct.unpack("<I", load(address))[0]
+
+        heap_start, heap_size = symbols["heap"]
+        heap = load(heap_start, heap_size)
+        total_byte_len = len(heap)
+
+        # These change every run so we load them from the symbol table
+        mp_state_ctx = symbols["mp_state_ctx"][0]
+        manual_symbol_map["mp_state_ctx+20"] = "mp_state_ctx.vm.last_pool"
+        last_pool = load_pointer(mp_state_ctx + 20) # (gdb) p &mp_state_ctx.vm.last_pool
+        manual_symbol_map["mp_state_ctx+88"] = "mp_state_ctx.vm.dict_main.map.table"
+        dict_main_table = load_pointer(mp_state_ctx + 88) # (gdb) p &mp_state_ctx.vm.dict_main.map.table
+        manual_symbol_map["mp_state_ctx+68"] = "mp_state_ctx.vm.mp_loaded_modules_dict.map.table"
+        imports_table = load_pointer(mp_state_ctx + 68) # (gdb) p &mp_state_ctx.vm.mp_loaded_modules_dict.map.table
+
+        manual_symbol_map["mp_state_ctx+104"] = "mp_state_ctx.vm.mp_sys_path_obj.items"
+        manual_symbol_map["mp_state_ctx+120"] = "mp_state_ctx.vm.mp_sys_argv_obj.items"
+
+        for i in range(READLINE_HIST_SIZE):
+            manual_symbol_map["mp_state_ctx+{}".format(128 + i * 4)] = "mp_state_ctx.vm.readline_hist[{}]".format(i)
+
+        tuple_type = symbols["mp_type_tuple"][0]
+        type_type = symbols["mp_type_type"][0]
+        map_type = symbols["mp_type_map"][0]
+        dict_type = symbols["mp_type_dict"][0]
+        property_type = symbols["mp_type_property"][0]
+        str_type = symbols["mp_type_str"][0]
+        function_types = [symbols["mp_type_fun_" + x][0] for x in ["bc", "builtin_0", "builtin_1", "builtin_2", "builtin_3", "builtin_var"]]
+        bytearray_type = symbols["mp_type_bytearray"][0]
+
+        dynamic_type = 0x40000000 # placeholder, doesn't match any memory
+
+        type_colors = {
+            dict_type: "red",
+            property_type: "yellow",
+            map_type: "blue",
+            type_type: "orange",
+            tuple_type: "skyblue",
+            str_type: "pink",
+            bytearray_type: "purple"
+            }
+
+        pool_shift = heap_start % BYTES_PER_BLOCK
+        atb_length = total_byte_len * BITS_PER_BYTE // (BITS_PER_BYTE + BITS_PER_BYTE * BLOCKS_PER_ATB // BLOCKS_PER_FTB + BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK)
+        pool_length = atb_length * BLOCKS_PER_ATB * BYTES_PER_BLOCK
+        gc_finaliser_table_byte_len = (atb_length * BLOCKS_PER_ATB + BLOCKS_PER_FTB - 1) // BLOCKS_PER_FTB
+
+        if print_heap_structure:
+            print("mp_state_ctx at 0x{:08x} and length {}".format(*symbols["mp_state_ctx"]))
+            print("Total heap length:", total_byte_len)
+            print("ATB length:", atb_length)
+            print("Total allocatable:", pool_length)
+            print("FTB length:", gc_finaliser_table_byte_len)
+
+        pool_start = heap_start + total_byte_len - pool_length - pool_shift
+        pool = heap[-pool_length-pool_shift:]
+
+        total_height = 65 * 18
+        total_width = (pool_length // (64 * 16)) * 90
+
+        map_element_blocks = [dict_main_table, imports_table]
+        string_blocks = []
+        bytecode_blocks = []
+        qstr_pools = []
+        qstr_chunks = []
+        block_data = {}
+
+        # Find all the qtr pool addresses.
+        prev_pool = last_pool
+        while prev_pool > ram_start:
+            qstr_pools.append(prev_pool)
+            prev_pool = load_pointer(prev_pool)
+
+        def save_allocated_block(end, current_allocation):
             allocation_length = current_allocation * BYTES_PER_BLOCK
-            end = (i * BLOCKS_PER_ATB + j) * BYTES_PER_BLOCK
             start = end - allocation_length
             address = pool_start + start
             data = pool[start:end]
-            print("0x{:x} {} bytes allocated".format(address, allocation_length))
+            if print_block_state:
+                print("0x{:x} {} bytes allocated".format(address, allocation_length))
+            if print_block_contents:
+                print(data)
 
             rows = ""
             for k in range(current_allocation - 1):
@@ -214,6 +253,7 @@ def load_pointer(address):
             ownership_graph.add_node(address, label=table, style="invisible", shape="plaintext")
             potential_type = None
             node = ownership_graph.get_node(address)
+            node.attr["height"] = 0.25 * current_allocation
             block_data[address] = data
             for k in range(len(data) // 4):
                 word = struct.unpack_from("<I", data, offset=(k * 4))[0]
@@ -226,9 +266,8 @@ def load_pointer(address):
                         bgcolor = "green"
                     elif potential_type in type_colors:
                         bgcolor = type_colors[potential_type]
-                    else:
-                        pass
-                        #print("unknown type", hex(potential_type))
+                    elif print_unknown_types:
+                        print("unknown type", hex(potential_type))
                     node.attr["label"] = "<" + node.attr["label"].replace("\"gray\"", "\"" + bgcolor + "\"") + ">"
 
                 if potential_type == str_type and k == 3:
@@ -262,180 +301,294 @@ def load_pointer(address):
                     if k == 2 and 0x20000000 < word < 0x20040000:
                         bytecode_blocks.append(word)
 
-            current_allocation = 0
-        if block_state == AT_FREE:
-            current_free += 1
-            total_free += 1
-        elif block_state == AT_HEAD:
-            current_allocation = 1
-        elif block_state == AT_TAIL:
-            current_allocation += 1
-        longest_free = max(longest_free, current_free)
-if current_free > 0:
-    print("{} bytes free".format(current_free * BYTES_PER_BLOCK))
-
-def is_qstr(obj):
-    return obj & 0xff800007 == 0x00000006
-
-def find_qstr(qstr_index):
-    pool_ptr = last_pool
-    if not is_qstr(qstr_index):
-        return "object"
-    qstr_index >>= 3
-    while pool_ptr != 0:
-        #print(hex(pool_ptr))
-        if pool_ptr in block_data:
-            pool = block_data[pool_ptr]
-            prev, total_prev_len, alloc, length = struct.unpack_from("<IIII", pool)
-        else:
-            rom_offset = pool_ptr - rom_start
-            prev, total_prev_len, alloc, length = struct.unpack_from("<IIII", rom[rom_offset:rom_offset+32])
-            pool = rom[rom_offset:rom_offset+length*4]
-            #print("rom pool")
-        #print(hex(prev), total_prev_len, alloc, length)
-        #print(qstr_index, total_prev_len)
-        if qstr_index >= total_prev_len:
-            offset = (qstr_index - total_prev_len) * 4 + 16
-            start = struct.unpack_from("<I", pool, offset=offset)[0]
-            #print(hex(start))
-            if start < heap_start:
-                start -= rom_start
-                if start > len(rom):
-                    return "more than rom: {:x}".format(start + rom_start)
-                qstr_hash, qstr_len = struct.unpack("<BB", rom[start:start+2])
-                return rom[start+2:start+2+qstr_len].decode("utf-8")
+
+        longest_free = 0
+        current_free = 0
+        current_allocation = 0
+        total_free = 0
+        for i in range(atb_length):
+            # Each atb byte is four blocks worth of info
+            atb = heap[i]
+            for j in range(4):
+                block_state = (atb >> (j * 2)) & 0x3
+                if block_state != AT_FREE and current_free > 0:
+                    if print_block_state:
+                        print("{} bytes free".format(current_free * BYTES_PER_BLOCK))
+                    current_free = 0
+                if block_state != AT_TAIL and current_allocation > 0:
+                    save_allocated_block((i * BLOCKS_PER_ATB + j) * BYTES_PER_BLOCK, current_allocation)
+                    current_allocation = 0
+                if block_state == AT_FREE:
+                    current_free += 1
+                    total_free += 1
+                elif block_state == AT_HEAD or block_state == AT_MARK:
+                    current_allocation = 1
+                elif block_state == AT_TAIL and current_allocation > 0:
+                    # In gc_free the logging happens before the tail is freed. So checking
+                    # current_allocation > 0 ensures we only extend an allocation thats started.
+                    current_allocation += 1
+                longest_free = max(longest_free, current_free)
+        #if current_free > 0:
+        #    print("{} bytes free".format(current_free * BYTES_PER_BLOCK))
+        if current_allocation > 0:
+            save_allocated_block(pool_length, current_allocation)
+
+        def is_qstr(obj):
+            return obj & 0xff800007 == 0x00000006
+
+        def find_qstr(qstr_index):
+            pool_ptr = last_pool
+            if not is_qstr(qstr_index):
+                return "object"
+            qstr_index >>= 3
+            while pool_ptr != 0:
+                if pool_ptr > ram_start:
+                    if pool_ptr in block_data:
+                        pool = block_data[pool_ptr]
+                        prev, total_prev_len, alloc, length = struct.unpack_from("<IIII", pool)
+                    else:
+                        print("missing qstr pool: {:08x}".format(pool_ptr))
+                        return "missing"
+                else:
+                    rom_offset = pool_ptr - rom_start
+                    prev, total_prev_len, alloc, length = struct.unpack_from("<IIII", rom[rom_offset:rom_offset+32])
+                    pool = rom[rom_offset:rom_offset+length*4]
+
+                if qstr_index >= total_prev_len:
+                    offset = (qstr_index - total_prev_len) * 4 + 16
+                    start = struct.unpack_from("<I", pool, offset=offset)[0]
+                    if start < heap_start:
+                        start -= rom_start
+                        if start > len(rom):
+                            return "more than rom: {:x}".format(start + rom_start)
+                        qstr_hash, qstr_len = struct.unpack("<BB", rom[start:start+2])
+                        return rom[start+2:start+2+qstr_len].decode("utf-8")
+                    else:
+                        if start > heap_start + len(heap):
+                            return "out of range: {:x}".format(start)
+                        local = start - heap_start
+                        qstr_hash, qstr_len = struct.unpack("<BB", heap[local:local+2])
+                        return heap[local+2:local+2+qstr_len].decode("utf-8")
+
+                pool_ptr = prev
+            return "unknown"
+
+        def format(obj):
+            if obj & 1 != 0:
+                return obj >> 1
+            if is_qstr(obj):
+                return find_qstr(obj)
+            else:
+                return "0x{:08x}".format(obj)
+
+        for block in sorted(map_element_blocks):
+            if block == 0:
+                continue
+            try:
+                node = ownership_graph.get_node(block)
+            except KeyError:
+                print("Unable to find memory block for 0x{:08x}. Is there something running?".format(block))
+                continue
+            if block not in block_data:
+                continue
+            data = block_data[block]
+            cells = []
+            for i in range(len(data) // 8):
+                key, value = struct.unpack_from("<II", data, offset=(i * 8))
+                if key == MP_OBJ_NULL or key == MP_OBJ_SENTINEL:
+                    cells.append(("", " "))
+                else:
+                    cells.append((key, format(key)))
+                    if value in block_data:
+                        edge = ownership_graph.get_edge(block, value)
+                        edge.attr["tailport"] = str(key)
+            rows = ""
+            for i in range(len(cells) // 2):
+                rows += "<tr><td port=\"{}\">{}</td><td port=\"{}\">{}</td></tr>".format(
+                    cells[2*i][0],
+                    cells[2*i][1],
+                    cells[2*i+1][0],
+                    cells[2*i+1][1])
+            node.attr["shape"] = "plaintext"
+            node.attr["style"] = "invisible"
+            node.attr["label"] = "<<table bgcolor=\"gold\" border=\"1\" cellpadding=\"0\" cellspacing=\"0\"><tr><td colspan=\"2\">0x{:08x}</td></tr>{}</table>>".format(block, rows)
+
+        for node, degree in ownership_graph.in_degree_iter():
+            if degree == 0:
+                address_bytes = struct.pack("<I", int(node))
+                location = -1
+                for _ in range(ram.count(address_bytes)):
+                    location = ram.find(address_bytes, location + 1)
+                    pointer_location = ram_start + location
+                    source = "0x{:08x}".format(pointer_location)
+                    if pointer_location in symbol_lookup:
+                        source = symbol_lookup[pointer_location]
+                    if source in manual_symbol_map:
+                        source = manual_symbol_map[source]
+                    if "readline_hist" in source:
+                        string_blocks.append(int(node))
+                    ownership_graph.add_edge(source, node)
+
+        for block in string_blocks:
+            if block == 0:
+                continue
+            node = ownership_graph.get_node(block)
+            node.attr["fillcolor"] = "hotpink"
+            if block in block_data:
+                raw_string = block_data[block]
             else:
-                if start > heap_start + len(heap):
-                    return "out of range: {:x}".format(start)
-                local = start - heap_start
-                qstr_hash, qstr_len = struct.unpack("<BB", heap[local:local+2])
-                return heap[local+2:local+2+qstr_len].decode("utf-8")
-
-        pool_ptr = prev
-    return "unknown"
-
-def format(obj):
-    if obj & 1 != 0:
-        return obj >> 1
-    if is_qstr(obj):
-        return find_qstr(obj)
-    else:
-        return "0x{:08x}".format(obj)
-
-for block in sorted(map_element_blocks):
-    try:
-        node = ownership_graph.get_node(block)
-    except KeyError:
-        print("Unable to find memory block for 0x{:08x}. Is there something running?".format(block))
-        continue
-    #node.attr["fillcolor"] = "gold"
-    data = block_data[block]
-    #print("0x{:08x}".format(block))
-    cells = []
-    for i in range(len(data) // 8):
-        key, value = struct.unpack_from("<II", data, offset=(i * 8))
-        if key == MP_OBJ_NULL or key == MP_OBJ_SENTINEL:
-            #print("  <empty slot>")
-            cells.append(("", " "))
-        else:
-            #print("  {}, {}".format(format(key), format(value)))
-            cells.append((key, format(key)))
-            if value in block_data:
-                edge = ownership_graph.get_edge(block, value)
-                edge.attr["tailport"] = str(key)
-    rows = ""
-    for i in range(len(cells) // 2):
-        rows += "<tr><td port=\"{}\">{}</td><td port=\"{}\">{}</td></tr>".format(
-            cells[2*i][0],
-            cells[2*i][1],
-            cells[2*i+1][0],
-            cells[2*i+1][1])
-    node.attr["shape"] = "plaintext"
-    node.attr["style"] = "invisible"
-    node.attr["label"] = "<<table bgcolor=\"gold\" border=\"1\" cellpadding=\"0\" cellspacing=\"0\"><tr><td colspan=\"2\">0x{:08x}</td></tr>{}</table>>".format(block, rows)
-
-for node, degree in ownership_graph.in_degree_iter():
-    if degree == 0:
-        address_bytes = struct.pack("<I", int(node))
-        location = -1
-        for _ in range(ram.count(address_bytes)):
-            location = ram.find(address_bytes, location + 1)
-            pointer_location = ram_start + location
-            source = "0x{:08x}".format(pointer_location)
-            if pointer_location in symbol_lookup:
-                source = symbol_lookup[pointer_location]
-            if source in manual_symbol_map:
-                source = manual_symbol_map[source]
-            if "readline_hist" in source:
-                string_blocks.append(int(node))
-            ownership_graph.add_edge(source, node)
-
-for block in string_blocks:
-    node = ownership_graph.get_node(block)
-    node.attr["fillcolor"] = "hotpink"
-    string = block_data[block].decode('utf-8')
-    wrapped = []
-    for i in range(0, len(string), 16):
-        wrapped.append(string[i:i+16])
-    node.attr["label"] = "\n".join(wrapped)
-    node.attr["style"] = "filled"
-    node.attr["fontname"] = "FiraCode-Medium"
-    node.attr["fontpath"] = "/Users/tannewt/Library/Fonts/"
-    node.attr["fontsize"] = 8
-
-for block in bytecode_blocks:
-    node = ownership_graph.get_node(block)
-    node.attr["fillcolor"] = "lightseagreen"
-    data = block_data[block]
-    prelude = Prelude(io.BufferedReader(io.BytesIO(data)))
-    node.attr["shape"] = "plaintext"
-    node.attr["style"] = "invisible"
-    code_info_size = prelude.code_info_size
-    rows = ""
-    remaining_bytecode = len(data) - 16
-    while code_info_size >= 16:
-        rows += "<tr><td colspan=\"16\" bgcolor=\"palegreen\" height=\"18\" width=\"80\"></td></tr>"
-        code_info_size -= 16
-        remaining_bytecode -= 16
-    if code_info_size > 0:
-        rows += ("<tr><td colspan=\"{}\" bgcolor=\"palegreen\" height=\"18\" width=\"{}\"></td>"
-                 "<td colspan=\"{}\" bgcolor=\"seagreen\" height=\"18\" width=\"{}\"></td></tr>"
-                ).format(code_info_size, code_info_size * (80 / 16), (16 - code_info_size), (80 / 16) * (16 - code_info_size))
-        remaining_bytecode -= 16
-    for i in range(remaining_bytecode // 16):
-        rows += "<tr><td colspan=\"16\" bgcolor=\"seagreen\" height=\"18\" width=\"80\"></td></tr>"
-    node.attr["label"] = "<<table border=\"1\" cellspacing=\"0\"><tr><td colspan=\"16\" bgcolor=\"lightseagreen\" height=\"18\" width=\"80\">0x{:08x}</td></tr>{}</table>>".format(block, rows)
-
-for block in qstr_chunks:
-    if block not in block_data:
-        ownership_graph.delete_node(block)
-        continue
-    data = block_data[block]
-    string = ""
-    offset = 0
-    while offset < len(data) - 1:
-        qstr_hash, qstr_len = struct.unpack_from("<BB", data, offset=offset)
-        if qstr_hash == 0:
-            string += " " * (len(data) - offset)
-            offset = len(data)
-            continue
-        offset += 2 + qstr_len + 1
-        string += "  " + data[offset - qstr_len - 1: offset - 1].decode("utf-8")
-    #print(string)
-    wrapped = []
-    for i in range(0, len(string), 16):
-        wrapped.append(html.escape(string[i:i+16]))
-    node = ownership_graph.get_node(block)
-    node.attr["label"] = "<<table border=\"1\" cellspacing=\"0\" bgcolor=\"lightsalmon\" width=\"80\"><tr><td height=\"18\" >0x{:08x}</td></tr><tr><td height=\"{}\" >{}</td></tr></table>>".format(block, 18 * (len(wrapped) - 1), "<br/>".join(wrapped))
-    node.attr["fontname"] = "FiraCode-Medium"
-    node.attr["fontpath"] = "/Users/tannewt/Library/Fonts/"
-    node.attr["fontsize"] = 8
-
-print("Total free space:", BYTES_PER_BLOCK * total_free)
-print("Longest free space:", BYTES_PER_BLOCK * longest_free)
-
-with open("heap.dot", "w") as f:
-    f.write(ownership_graph.string())
-
-ownership_graph.layout(prog="dot")
-ownership_graph.draw("heap.png")
+                print("Unable to find memory block for string at 0x{:08x}.".format(block))
+                continue
+            try:
+                raw_string = block_data[block].decode('utf-8')
+            except:
+                raw_string = str(block_data[block])
+            wrapped = []
+            for i in range(0, len(raw_string), 16):
+                wrapped.append(raw_string[i:i+16])
+            node.attr["label"] = "\n".join(wrapped)
+            node.attr["style"] = "filled"
+            node.attr["fontname"] = "FiraCode-Medium"
+            node.attr["fontpath"] = "/Users/tannewt/Library/Fonts/"
+            node.attr["fontsize"] = 8
+            node.attr["height"] = len(wrapped) * 0.25
+
+        for block in bytecode_blocks:
+            node = ownership_graph.get_node(block)
+            node.attr["fillcolor"] = "lightseagreen"
+            if block in block_data:
+                data = block_data[block]
+            else:
+                print("Unable to find memory block for bytecode at 0x{:08x}.".format(block))
+                continue
+            prelude = Prelude(io.BufferedReader(io.BytesIO(data)))
+            node.attr["shape"] = "plaintext"
+            node.attr["style"] = "invisible"
+            code_info_size = prelude.code_info_size
+            rows = ""
+            remaining_bytecode = len(data) - 16
+            while code_info_size >= 16:
+                rows += "<tr><td colspan=\"16\" bgcolor=\"palegreen\" height=\"18\" width=\"80\"></td></tr>"
+                code_info_size -= 16
+                remaining_bytecode -= 16
+            if code_info_size > 0:
+                rows += ("<tr><td colspan=\"{}\" bgcolor=\"palegreen\" height=\"18\" width=\"{}\"></td>"
+                         "<td colspan=\"{}\" bgcolor=\"seagreen\" height=\"18\" width=\"{}\"></td></tr>"
+                        ).format(code_info_size, code_info_size * (80 / 16), (16 - code_info_size), (80 / 16) * (16 - code_info_size))
+                remaining_bytecode -= 16
+            for i in range(remaining_bytecode // 16):
+                rows += "<tr><td colspan=\"16\" bgcolor=\"seagreen\" height=\"18\" width=\"80\"></td></tr>"
+            node.attr["label"] = "<<table border=\"1\" cellspacing=\"0\"><tr><td colspan=\"16\" bgcolor=\"lightseagreen\" height=\"18\" width=\"80\">0x{:08x}</td></tr>{}</table>>".format(block, rows)
+
+        for block in qstr_chunks:
+            if block not in block_data:
+                ownership_graph.delete_node(block)
+                continue
+            data = block_data[block]
+            qstrs_in_chunk = ""
+            offset = 0
+            while offset < len(data) - 1:
+                qstr_hash, qstr_len = struct.unpack_from("<BB", data, offset=offset)
+                if qstr_hash == 0:
+                    qstrs_in_chunk += " " * (len(data) - offset)
+                    offset = len(data)
+                    continue
+                offset += 2 + qstr_len + 1
+                qstrs_in_chunk += "  " + data[offset - qstr_len - 1: offset - 1].decode("utf-8")
+            printable_qstrs = ""
+            for i in range(len(qstrs_in_chunk)):
+                c = qstrs_in_chunk[i]
+                if c not in string.printable or c in "\v\f":
+                    printable_qstrs += "░"
+                else:
+                    printable_qstrs += qstrs_in_chunk[i]
+            wrapped = []
+            for i in range(0, len(printable_qstrs), 16):
+                wrapped.append(html.escape(printable_qstrs[i:i+16]))
+            node = ownership_graph.get_node(block)
+            node.attr["label"] = "<<table border=\"1\" cellspacing=\"0\" bgcolor=\"lightsalmon\" width=\"80\"><tr><td height=\"18\" >0x{:08x}</td></tr><tr><td height=\"{}\" >{}</td></tr></table>>".format(block, 18 * (len(wrapped) - 1), "<br/>".join(wrapped))
+            node.attr["fontname"] = "FiraCode-Medium"
+            node.attr["fontpath"] = "/Users/tannewt/Library/Fonts/"
+            node.attr["fontsize"] = 8
+
+        print("Total free space:", BYTES_PER_BLOCK * total_free)
+        print("Longest free space:", BYTES_PER_BLOCK * longest_free)
+
+        # First render the graph of objects on the heap.
+        if draw_heap_ownership:
+            ownership_graph.layout(prog="dot")
+            fn = os.path.join(output_directory, "heap_ownership{:04d}.png".format(snapshot_num))
+            print(fn)
+            ownership_graph.draw(fn)
+
+        # Second, render the heap layout in memory order.
+        for node in ownership_graph:
+            try:
+                address = int(node.name)
+            except ValueError:
+                ownership_graph.remove_node(node)
+                continue
+            block = (address - pool_start) // 16
+            x = block // 64
+            y = 64 - block % 64
+            try:
+                height = float(node.attr["height"])
+            except:
+                height = 0.25
+            #print(hex(address), "height", height, y)
+            #if address in block_data:
+            #    print(hex(address), block, len(block_data[address]), x, y, height)
+            node.attr["pos"] = "{},{}".format(x * 80, (y - (height - 0.25) * 2) * 18) # in inches
+
+        # Clear edge positioning from ownership graph layout.
+        if draw_heap_ownership:
+            for edge in ownership_graph.iteredges():
+                del edge.attr["pos"]
+
+        # Reformat block nodes so they are the correct size and do not have keys in them.
+        for block in sorted(map_element_blocks):
+            try:
+                node = ownership_graph.get_node(block)
+            except KeyError:
+                if block != 0:
+                    print("Unable to find memory block for 0x{:08x}. Is there something running?".format(block))
+                continue
+            #node.attr["fillcolor"] = "gold"
+            if block not in block_data:
+                continue
+            data = block_data[block]
+            #print("0x{:08x}".format(block))
+            cells = []
+            for i in range(len(data) // 8):
+                key, value = struct.unpack_from("<II", data, offset=(i * 8))
+                if key == MP_OBJ_NULL or key == MP_OBJ_SENTINEL:
+                    #print("  <empty slot>")
+                    cells.append(("", " "))
+                else:
+                    #print("  {}, {}".format(format(key), format(value)))
+                    cells.append((key, ""))
+                    if value in block_data:
+                        edge = ownership_graph.get_edge(block, value)
+                        edge.attr["tailport"] = str(key)
+            rows = ""
+            for i in range(len(cells) // 2):
+                rows += "<tr><td port=\"{}\" height=\"18\" width=\"40\">{}</td><td port=\"{}\" height=\"18\" width=\"40\">{}</td></tr>".format(
+                    cells[2*i][0],
+                    cells[2*i][1],
+                    cells[2*i+1][0],
+                    cells[2*i+1][1])
+            node.attr["label"] = "<<table bgcolor=\"gold\" border=\"1\" cellpadding=\"0\" cellspacing=\"0\">{}</table>>".format(rows)
+
+
+        ownership_graph.add_node("center", pos="{},{}".format(total_width // 2 - 40, total_height // 2), shape="plaintext", label=" ")
+        ownership_graph.graph_attr["viewport"] = "{},{},1,{}".format(total_width, total_height, "center")
+
+        ownership_graph.has_layout = True
+
+        if draw_heap_layout:
+            fn = os.path.join(output_directory, "heap_layout{:04d}.png".format(snapshot_num))
+            print(fn)
+            ownership_graph.draw(fn)
+
+if __name__ == "__main__":
+    do_all_the_things()
diff --git a/tools/analyze_mpy.py b/tools/analyze_mpy.py
index 00be66106524b..376207a24e04f 100644
--- a/tools/analyze_mpy.py
+++ b/tools/analyze_mpy.py
@@ -13,46 +13,79 @@
 }
 
 bytecodes = {
+    0x00: {"name": "MP_BC_LOAD_FAST_MULTI",
+           "format": "MP_OPCODE_BYTE"},
     0x10: {"name": "MP_BC_LOAD_CONST_FALSE",
            "format": "MP_OPCODE_BYTE"},
-#define MP_BC_LOAD_CONST_NONE    (0x11)
-#define MP_BC_LOAD_CONST_TRUE    (0x12)
-#define MP_BC_LOAD_CONST_SMALL_INT   (0x14) // signed var-int
-#define MP_BC_LOAD_CONST_STRING  (0x16) // qstr
+    0x11: {"name": "MP_BC_LOAD_CONST_NONE",
+           "format": "MP_OPCODE_BYTE"},
+    0x12: {"name": "MP_BC_LOAD_CONST_TRUE",
+           "format": "MP_OPCODE_BYTE"},
+    0x14: {"name": "MP_BC_LOAD_CONST_SMALL_INT",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x16: {"name": "MP_BC_LOAD_CONST_STRING",
+           "format": "MP_OPCODE_QSTR"},
+    0x17: {"name": "MP_BC_LOAD_CONST_OBJ",
+           "format": "MP_OPCODE_VAR_UINT"},
 #define MP_BC_LOAD_CONST_OBJ     (0x17) // ptr
-#define MP_BC_LOAD_NULL          (0x18)
+    0x18: {"name": "MP_BC_LOAD_NULL",
+           "format": "MP_OPCODE_BYTE"},
 
 #define MP_BC_LOAD_FAST_N        (0x19) // uint
-#define MP_BC_LOAD_DEREF         (0x1a) // uint
-#define MP_BC_LOAD_NAME          (0x1b) // qstr
-#define MP_BC_LOAD_GLOBAL        (0x1c) // qstr
-#define MP_BC_LOAD_ATTR          (0x1d) // qstr
-#define MP_BC_LOAD_METHOD        (0x1e) // qstr
-#define MP_BC_LOAD_SUPER_METHOD  (0x1f) // qstr
+    0x1a: {"name": "MP_BC_LOAD_DEREF",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x1b: {"name": "MP_BC_LOAD_NAME",
+           "format": "MP_OPCODE_QSTR"},
+    0x1c: {"name": "MP_BC_LOAD_GLOBAL",
+           "format": "MP_OPCODE_QSTR"},
+    0x1d: {"name": "MP_BC_LOAD_ATTR",
+           "format": "MP_OPCODE_QSTR"},
+    0x1e: {"name": "MP_BC_LOAD_METHOD",
+           "format": "MP_OPCODE_QSTR"},
+    0x1f: {"name": "MP_BC_LOAD_SUPER_METHOD",
+           "format": "MP_OPCODE_QSTR"},
+    0x20: {"name": "MP_BC_LOAD_BUILD_CLASS",
+           "format": "MP_OPCODE_BYTE"},
 #define MP_BC_LOAD_BUILD_CLASS   (0x20)
 #define MP_BC_LOAD_SUBSCR        (0x21)
+    0x21: {"name": "MP_BC_LOAD_SUBSCR",
+           "format": "MP_OPCODE_BYTE"},
 
 #define MP_BC_STORE_FAST_N       (0x22) // uint
 #define MP_BC_STORE_DEREF        (0x23) // uint
 #define MP_BC_STORE_NAME         (0x24) // qstr
-#define MP_BC_STORE_GLOBAL       (0x25) // qstr
-#define MP_BC_STORE_ATTR         (0x26) // qstr
-#define MP_BC_STORE_SUBSCR       (0x27)
+    0x24: {"name": "MP_BC_STORE_NAME",
+           "format": "MP_OPCODE_QSTR"},
+    0x25: {"name": "MP_BC_STORE_GLOBAL",
+           "format": "MP_OPCODE_QSTR"},
+    0x26: {"name": "MP_BC_STORE_ATTR",
+           "format": "MP_OPCODE_QSTR"},
+    0x27: {"name": "MP_BC_LOAD_SUBSCR",
+           "format": "MP_OPCODE_BYTE"},
 
+    0x28: {"name": "MP_BC_DELETE_FAST",
+           "format": "MP_OPCODE_VAR_UINT"},
 #define MP_BC_DELETE_FAST        (0x28) // uint
 #define MP_BC_DELETE_DEREF       (0x29) // uint
 #define MP_BC_DELETE_NAME        (0x2a) // qstr
 #define MP_BC_DELETE_GLOBAL      (0x2b) // qstr
 
-#define MP_BC_DUP_TOP            (0x30)
+    0x30: {"name": "MP_BC_DUP_TOP",
+           "format": "MP_OPCODE_BYTE"},
 #define MP_BC_DUP_TOP_TWO        (0x31)
-#define MP_BC_POP_TOP            (0x32)
-#define MP_BC_ROT_TWO            (0x33)
-#define MP_BC_ROT_THREE          (0x34)
+    0x32: {"name": "MP_BC_POP_TOP",
+           "format": "MP_OPCODE_BYTE"},
+    0x33: {"name": "MP_BC_ROT_TWO",
+           "format": "MP_OPCODE_BYTE"},
+    0x34: {"name": "MP_BC_ROT_THREE",
+           "format": "MP_OPCODE_BYTE"},
 
-#define MP_BC_JUMP               (0x35) // rel byte code offset, 16-bit signed, in excess
-#define MP_BC_POP_JUMP_IF_TRUE   (0x36) // rel byte code offset, 16-bit signed, in excess
-#define MP_BC_POP_JUMP_IF_FALSE  (0x37) // rel byte code offset, 16-bit signed, in excess
+    0x35: {"name": "MP_BC_JUMP",
+           "format": "MP_OPCODE_OFFSET"},
+    0x36: {"name": "MP_BC_POP_JUMP_IF_TRUE",
+           "format": "MP_OPCODE_OFFSET"},
+    0x37: {"name": "MP_BC_POP_JUMP_IF_FALSE",
+           "format": "MP_OPCODE_OFFSET"},
 #define MP_BC_JUMP_IF_TRUE_OR_POP    (0x38) // rel byte code offset, 16-bit signed, in excess
 #define MP_BC_JUMP_IF_FALSE_OR_POP   (0x39) // rel byte code offset, 16-bit signed, in excess
 #define MP_BC_SETUP_WITH         (0x3d) // rel byte code offset, 16-bit unsigned
@@ -62,44 +95,172 @@
 #define MP_BC_END_FINALLY        (0x41)
 #define MP_BC_GET_ITER           (0x42)
 #define MP_BC_FOR_ITER           (0x43) // rel byte code offset, 16-bit unsigned
-#define MP_BC_POP_BLOCK          (0x44)
+    0x43: {"name": "MP_BC_FOR_ITER",
+           "format": "MP_OPCODE_OFFSET"},
+
+    0x44: {"name": "MP_BC_POP_BLOCK",
+           "format": "MP_OPCODE_BYTE"},
 #define MP_BC_POP_EXCEPT         (0x45)
 #define MP_BC_UNWIND_JUMP        (0x46) // rel byte code offset, 16-bit signed, in excess; then a byte
-#define MP_BC_GET_ITER_STACK     (0x47)
+    0x47: {"name": "MP_BC_GET_ITER_STACK",
+           "format": "MP_OPCODE_BYTE"},
 
-#define MP_BC_BUILD_TUPLE        (0x50) // uint
-#define MP_BC_BUILD_LIST         (0x51) // uint
-#define MP_BC_BUILD_MAP          (0x53) // uint
-#define MP_BC_STORE_MAP          (0x54)
+
+    0x50: {"name": "MP_BC_BUILD_TUPLE",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x51: {"name": "MP_BC_BUILD_LIST",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x53: {"name": "MP_BC_BUILD_MAP",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x54: {"name": "MP_BC_STORE_MAP",
+           "format": "MP_OPCODE_BYTE"},
 #define MP_BC_BUILD_SET          (0x56) // uint
 #define MP_BC_BUILD_SLICE        (0x58) // uint
 #define MP_BC_STORE_COMP         (0x57) // uint
+    0x57: {"name": "MP_BC_STORE_COMP",
+           "format": "MP_OPCODE_VAR_UINT"},
 #define MP_BC_UNPACK_SEQUENCE    (0x59) // uint
 #define MP_BC_UNPACK_EX          (0x5a) // uint
 
-#define MP_BC_RETURN_VALUE       (0x5b)
-#define MP_BC_RAISE_VARARGS      (0x5c) // byte
+    0x5b: {"name": "MP_BC_RETURN_VALUE",
+           "format": "MP_OPCODE_BYTE"},
+    0x5c: {"name": "MP_BC_RAISE_VARARGS",
+           "format": "MP_OPCODE_BYTE_EXTRA"},
 #define MP_BC_YIELD_VALUE        (0x5d)
 #define MP_BC_YIELD_FROM         (0x5e)
 
 #define MP_BC_MAKE_FUNCTION         (0x60) // uint
-#define MP_BC_MAKE_FUNCTION_DEFARGS (0x61) // uint
-#define MP_BC_MAKE_CLOSURE          (0x62) // uint
-#define MP_BC_MAKE_CLOSURE_DEFARGS  (0x63) // uint
-#define MP_BC_CALL_FUNCTION         (0x64) // uint
-#define MP_BC_CALL_FUNCTION_VAR_KW  (0x65) // uint
-#define MP_BC_CALL_METHOD           (0x66) // uint
-#define MP_BC_CALL_METHOD_VAR_KW    (0x67) // uint
-
-#define MP_BC_IMPORT_NAME        (0x68) // qstr
+    0x60: {"name": "MP_BC_MAKE_FUNCTION",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x61: {"name": "MP_BC_MAKE_FUNCTION_DEFARGS",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x62: {"name": "MP_BC_MAKE_CLOSURE",
+           "format": "MP_OPCODE_VAR_UINT_EXTRA"},
+    0x63: {"name": "MP_BC_MAKE_CLOSURE",
+           "format": "MP_OPCODE_VAR_UINT_EXTRA"},
+    0x64: {"name": "MP_BC_CALL_FUNCTION",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x65: {"name": "MP_BC_CALL_FUNCTION_VAR_KW",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x66: {"name": "MP_BC_CALL_METHOD",
+           "format": "MP_OPCODE_VAR_UINT"},
+    0x67: {"name": "MP_BC_CALL_METHOD_VAR_KW",
+           "format": "MP_OPCODE_VAR_UINT"},
+
+    0x68: {"name": "MP_BC_IMPORT_NAME",
+           "format": "MP_OPCODE_QSTR"},
+    0x69: {"name": "MP_BC_IMPORT_FROM",
+           "format": "MP_OPCODE_QSTR"},
 #define MP_BC_IMPORT_FROM        (0x69) // qstr
 #define MP_BC_IMPORT_STAR        (0x6a)
 
 #define MP_BC_LOAD_CONST_SMALL_INT_MULTI (0x70) // + N(64)
+    0x7f: {"name": "MP_BC_LOAD_CONST_SMALL_INT_MULTI -1",
+           "format": "MP_OPCODE_BYTE"},
+    0x80: {"name": "MP_BC_LOAD_CONST_SMALL_INT_MULTI 0",
+           "format": "MP_OPCODE_BYTE"},
+    0x81: {"name": "MP_BC_LOAD_CONST_SMALL_INT_MULTI 1",
+           "format": "MP_OPCODE_BYTE"},
+    0x82: {"name": "MP_BC_LOAD_CONST_SMALL_INT_MULTI 2",
+           "format": "MP_OPCODE_BYTE"},
+    0x83: {"name": "MP_BC_LOAD_CONST_SMALL_INT_MULTI 3",
+           "format": "MP_OPCODE_BYTE"},
+    0x84: {"name": "MP_BC_LOAD_CONST_SMALL_INT_MULTI 4",
+           "format": "MP_OPCODE_BYTE"},
 #define MP_BC_LOAD_FAST_MULTI            (0xb0) // + N(16)
+    0xb0: {"name": "MP_BC_LOAD_FAST_MULTI 0",
+           "format": "MP_OPCODE_BYTE"},
+    0xb1: {"name": "MP_BC_LOAD_FAST_MULTI 1",
+           "format": "MP_OPCODE_BYTE"},
+    0xb2: {"name": "MP_BC_LOAD_FAST_MULTI 2",
+           "format": "MP_OPCODE_BYTE"},
+    0xb3: {"name": "MP_BC_LOAD_FAST_MULTI 3",
+           "format": "MP_OPCODE_BYTE"},
+    0xb4: {"name": "MP_BC_LOAD_FAST_MULTI 4",
+           "format": "MP_OPCODE_BYTE"},
+    0xb5: {"name": "MP_BC_LOAD_FAST_MULTI 5",
+           "format": "MP_OPCODE_BYTE"},
+    0xb6: {"name": "MP_BC_LOAD_FAST_MULTI 6",
+           "format": "MP_OPCODE_BYTE"},
+    0xb7: {"name": "MP_BC_LOAD_FAST_MULTI 7",
+           "format": "MP_OPCODE_BYTE"},
+    0xb8: {"name": "MP_BC_LOAD_FAST_MULTI 8",
+           "format": "MP_OPCODE_BYTE"},
 #define MP_BC_STORE_FAST_MULTI           (0xc0) // + N(16)
+    0xc0: {"name": "MP_BC_STORE_FAST_MULTI 0",
+           "format": "MP_OPCODE_BYTE"},
+    0xc1: {"name": "MP_BC_STORE_FAST_MULTI 1",
+           "format": "MP_OPCODE_BYTE"},
+    0xc2: {"name": "MP_BC_STORE_FAST_MULTI 2",
+           "format": "MP_OPCODE_BYTE"},
+    0xc3: {"name": "MP_BC_STORE_FAST_MULTI 3",
+           "format": "MP_OPCODE_BYTE"},
+    0xc4: {"name": "MP_BC_STORE_FAST_MULTI 4",
+           "format": "MP_OPCODE_BYTE"},
+    0xc5: {"name": "MP_BC_STORE_FAST_MULTI 5",
+           "format": "MP_OPCODE_BYTE"},
+    0xc6: {"name": "MP_BC_STORE_FAST_MULTI 6",
+           "format": "MP_OPCODE_BYTE"},
+    0xc7: {"name": "MP_BC_STORE_FAST_MULTI 7",
+           "format": "MP_OPCODE_BYTE"},
 #define MP_BC_UNARY_OP_MULTI             (0xd0) // + op(<MP_UNARY_OP_NUM_BYTECODE)
+
+    # // 9 relational operations, should return a bool
+    0xd7: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_LESS",
+           "format": "MP_OPCODE_BYTE"},
+    0xd8: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_MORE",
+           "format": "MP_OPCODE_BYTE"},
+    0xd9: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_EQUAL",
+           "format": "MP_OPCODE_BYTE"},
+    0xda: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_LESS_EQUAL",
+           "format": "MP_OPCODE_BYTE"},
+    0xdb: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_MORE_EQUAL",
+           "format": "MP_OPCODE_BYTE"},
+    0xdc: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_NOT_EQUAL",
+           "format": "MP_OPCODE_BYTE"},
+    # dc: MP_BINARY_OP_NOT_EQUAL,
+    # dd: MP_BINARY_OP_IN,
+    # de: MP_BINARY_OP_IS,
+    # df: MP_BINARY_OP_EXCEPTION_MATCH,
+    #
+    # // 12 inplace arithmetic operations
+    # e0: MP_BINARY_OP_INPLACE_OR,
+    # e1: MP_BINARY_OP_INPLACE_XOR,
+    # e2: MP_BINARY_OP_INPLACE_AND,
+    # e3: MP_BINARY_OP_INPLACE_LSHIFT,
+    # e4: MP_BINARY_OP_INPLACE_RSHIFT,
+    # e5: MP_BINARY_OP_INPLACE_ADD,
+    0xe5: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_INPLACE_ADD",
+           "format": "MP_OPCODE_BYTE"},
+    0xe6: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_INPLACE_SUBTRACT",
+           "format": "MP_OPCODE_BYTE"},
+    # e7: MP_BINARY_OP_INPLACE_MULTIPLY,
+    # e8: MP_BINARY_OP_INPLACE_FLOOR_DIVIDE,
+    # e9: MP_BINARY_OP_INPLACE_TRUE_DIVIDE,
+    # ea: MP_BINARY_OP_INPLACE_MODULO,
+    # eb: MP_BINARY_OP_INPLACE_POWER,
+    #
+    # // 12 normal arithmetic operations
+    # ec: MP_BINARY_OP_OR,
+    # ed: MP_BINARY_OP_XOR,
+    # ee: MP_BINARY_OP_AND,
+    # ef: MP_BINARY_OP_LSHIFT,
+    # f0: MP_BINARY_OP_RSHIFT,
 #define MP_BC_BINARY_OP_MULTI            (0xd7) // + op(<MP_BINARY_OP_NUM_BYTECODE)
+    0xf1: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_ADD",
+           "format": "MP_OPCODE_BYTE"},
+    0xf2: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_SUBTRACT",
+           "format": "MP_OPCODE_BYTE"},
+    0xf3: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_MULTIPLY",
+           "format": "MP_OPCODE_BYTE"},
+    0xf4: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_FLOOR_DIVIDE",
+           "format": "MP_OPCODE_BYTE"},
+    0xf5: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_TRUE_DIVIDE",
+           "format": "MP_OPCODE_BYTE"},
+    0xf6: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_MODULO",
+           "format": "MP_OPCODE_BYTE"},
+    0xf7: {"name": "MP_BC_BINARY_OP_MULTI MP_BINARY_OP_POWER",
+           "format": "MP_OPCODE_BYTE"},
 }
 
 def read_uint(encoded_uint, peek=False):
@@ -149,40 +310,94 @@ class RawCode:
     def __init__(self, encoded_raw_code):
         bc_len = read_uint(encoded_raw_code)
         bc = encoded_raw_code.read(bc_len)
-        prelude = Prelude(io.BufferedReader(io.BytesIO(bc)))
-        encoded_code_info = bc[:prelude.code_info_size]
-        bc_start = prelude.code_info_size
-        while bc[bc_start] == 0xff:
-            bc_start += 1
-        bc = bc[bc_start:]
+        bc = io.BufferedReader(io.BytesIO(bc))
+        prelude = Prelude(bc)
+        encoded_code_info = bc.read(prelude.code_info_size)
+        bc.read(1)
+        while bc.peek(1)[0] == 0xff:
+            bc.read(1)
+        bc = bytearray(bc.read())
+        #print(encoded_code_info, bc)
 
         self.qstrs = []
 
         self.simple_name = self._load_qstr(encoded_raw_code)
         self.source_file = self._load_qstr(encoded_raw_code)
         # the simple name and source file qstr indexes get written back into the byte code somehow
+        #print(bc)
         self._load_bytecode_qstrs(encoded_raw_code, bc)
+
+        #print(encoded_raw_code.peek(20)[:20])
+
+        n_obj = read_uint(encoded_raw_code)
+        n_raw_code = read_uint(encoded_raw_code)
+
+        self.const_table = []
+        for i in range(prelude.n_pos_args + prelude.n_kwonly_args):
+            self.const_table.append(self._load_qstr(encoded_raw_code))
+            print("load args", self.const_table[-1])
+        for i in range(n_obj):
+            self.const_table.append(self._load_obj(encoded_raw_code))
+            print("load obj", self.const_table[-1])
+        for i in range(n_raw_code):
+            print("load raw code")
+            self.const_table.append(RawCode(encoded_raw_code))
+
         print(self.qstrs[self.simple_name], self.qstrs[self.source_file])
-        print(binascii.hexlify(encoded_raw_code.peek(20)[:20]))
+        #print(binascii.hexlify(encoded_raw_code.peek(20)[:20]))
 
     def _load_qstr(self, encoded_qstr):
         string_len = read_uint(encoded_qstr)
         string = encoded_qstr.read(string_len).decode("utf-8")
+        print(string)
         if string in self.qstrs:
-            return self.qstr.index(string)
+            return self.qstrs.index(string)
         new_index = len(self.qstrs)
         self.qstrs.append(string)
         return new_index
 
-    def _load_bytecode_qstrs(encoded_raw_code, bytecode):
+    def _load_obj(self, encoded_obj):
+        obj_type = encoded_obj.read(1)
+        if obj_type == b'e':
+            return "..."
+        else:
+            str_len = read_uint(encoded_obj)
+            s = encoded_obj.read(str_len)
+            if obj_type == b's':
+                return s.decode("utf-8")
+            elif obj_type == b'b':
+                return s
+            elif obj_type == b'i':
+                return int(s)
+            elif obj_type == b'f':
+                return float(s)
+            elif obj_type == b'c':
+                return float(s)
+        raise RuntimeError("Unknown object type {}".format(obj_type))
+
+    def _load_bytecode_qstrs(self, encoded_raw_code, bytecode):
         i = 0
         while i < len(bytecode):
-            opcode, opcode_size = mp_opcode_format(bytecode[i])
-            if opcode == MP_OPCODE_QSTR:
+            bc = bytecode[i]
+            if bc not in bytecodes:
+                raise RuntimeError("missing code 0x{:x} at {}".format(bc, i))
+                return
+            bc = bytecodes[bc]
+            opcode = bc["name"]
+            print(opcode)
+            opcode_size = bytecode_format_sizes[bc["format"]]
+            if bc["format"] == "MP_OPCODE_QSTR":
                 qstr_index = self._load_qstr(encoded_raw_code)
                 bytecode[i+1] = qstr_index
                 bytecode[i+2] = qstr_index >> 8
-            opcode += opcode_size
+            if not opcode_size:
+                i += 2
+                while (bytecode[i] & 0x80) != 0:
+                    i += 1
+                if bc["format"] == "MP_OPCODE_VAR_UINT_EXTRA":
+                    i += 1
+            else:
+                i += opcode_size
 
 class mpyFile:
     def __init__(self, encoded_mpy):
diff --git a/tools/gc_activity.md b/tools/gc_activity.md
index 40252bdf7308e..d1a4c28031933 100644
--- a/tools/gc_activity.md
+++ b/tools/gc_activity.md
@@ -13,7 +13,7 @@ correct port. GDB is usually :3333 and JLink is :2331.
 Now, run gdb from your port directory:
 
 ```
-arm-none-eabi-gdb -x ../tools/output_gc_until_repl.txt build-metro_m0_flash/firmware.elf
+arm-none-eabi-gdb -x ../tools/output_gc_until_repl.txt build-metro_m0_express/firmware.elf
 ```
 
 This will take a little time while it breaks, backtraces and continues for every
diff --git a/tools/output_gc_until_repl.txt b/tools/output_gc_until_repl.txt
index 28fb2921982aa..0e99626353e5a 100644
--- a/tools/output_gc_until_repl.txt
+++ b/tools/output_gc_until_repl.txt
@@ -10,15 +10,16 @@ set logging on
 set remote hardware-breakpoint-limit 4
 
 # gc log
-break gc.c:110
+break gc.c:103
 commands
 backtrace
 p/x start_block
 p/x length
+append binary memory ram.bin &_srelocate &_estack
 continue
 end
 
-break mp_hal_stdin_rx_chr
+break main.c:164
 
 continue
 

From aa0ce98b3e7228a469fa11709fbd6864242079b4 Mon Sep 17 00:00:00 2001
From: Scott Shawcroft <scott.shawcroft@gmail.com>
Date: Wed, 24 Jan 2018 14:13:26 -0800
Subject: [PATCH 5/5] Fix the initial state and polish a couple comments.

---
 py/gc.c            | 2 +-
 py/gc_long_lived.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/py/gc.c b/py/gc.c
index ffdcbe5c02566..2c2354e4c0705 100644
--- a/py/gc.c
+++ b/py/gc.c
@@ -147,7 +147,7 @@ void gc_init(void *start, void *end) {
 #endif
 
     // Set first free ATB index to the start of the heap.
-    MP_STATE_MEM(gc_last_free_atb_index) = 0;
+    MP_STATE_MEM(gc_first_free_atb_index) = 0;
     // Set last free ATB index to the end of the heap.
     MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
     // Set the lowest long lived ptr to the end of the heap to start. This will be lowered as long
diff --git a/py/gc_long_lived.c b/py/gc_long_lived.c
index bd0e63d9be60a..c50bbcd83631c 100644
--- a/py/gc_long_lived.c
+++ b/py/gc_long_lived.c
@@ -46,7 +46,6 @@ mp_obj_fun_bc_t *make_fun_bc_long_lived(mp_obj_fun_bc_t *fun_bc, uint8_t max_dep
         mp_raw_code_t* raw_code = MP_OBJ_TO_PTR(fun_bc->const_table[i]);
         if (raw_code->kind == MP_CODE_BYTECODE) {
             raw_code->data.u_byte.bytecode = gc_make_long_lived((byte*) raw_code->data.u_byte.bytecode);
-            // TODO(tannewt): Do we actually want to recurse here?
             raw_code->data.u_byte.const_table = gc_make_long_lived((byte*) raw_code->data.u_byte.const_table);
         }
         ((mp_uint_t *) fun_bc->const_table)[i] = (mp_uint_t) make_obj_long_lived(
@@ -56,6 +55,8 @@ mp_obj_fun_bc_t *make_fun_bc_long_lived(mp_obj_fun_bc_t *fun_bc, uint8_t max_dep
     fun_bc->const_table = gc_make_long_lived((mp_uint_t*) fun_bc->const_table);
     // extra_args stores keyword only argument default values.
     size_t words = gc_nbytes(fun_bc) / sizeof(mp_uint_t*);
+    // Functions (mp_obj_fun_bc_t) have four pointers (base, globals, bytecode and const_table)
+    // before the variable length extra_args so remove them from the length.
     for (size_t i = 0; i < words - 4; i++) {
         if (fun_bc->extra_args[i] == NULL) {
             continue;