diff --git a/main.c b/main.c
index 80b163f6079f7..1389f524b6b54 100755
--- a/main.c
+++ b/main.c
@@ -123,15 +123,15 @@ void start_mp(supervisor_allocation* heap) {
     // to recover from limit hit.  (Limit is measured in bytes.)
     mp_stack_ctrl_init();
 
-    if (stack_alloc != NULL) {
-        mp_stack_set_limit(stack_alloc->length - 1024);
+    if (stack_get_bottom() != NULL) {
+        mp_stack_set_limit(stack_get_length() - 1024);
     }
 
 
 #if MICROPY_MAX_STACK_USAGE
     // _ezero (same as _ebss) is an int, so start 4 bytes above it.
-    if (stack_alloc != NULL) {
-        mp_stack_set_bottom(stack_alloc->ptr);
+    if (stack_get_bottom() != NULL) {
+        mp_stack_set_bottom(stack_get_bottom());
         mp_stack_fill_with_sentinel();
     }
 #endif
@@ -148,7 +148,7 @@ void start_mp(supervisor_allocation* heap) {
     #endif
 
     #if MICROPY_ENABLE_GC
-    gc_init(heap->ptr, heap->ptr + heap->length / 4);
+    gc_init(heap->ptr, heap->ptr + get_allocation_length(heap) / 4);
     #endif
     mp_init();
     mp_obj_list_init(mp_sys_path, 0);
@@ -451,9 +451,6 @@ int __attribute__((used)) main(void) {
     // initialise the cpu and peripherals
     safe_mode_t safe_mode = port_init();
 
-    // Init memory after the port in case the port needs to set aside memory.
-    memory_init();
-
     // Turn on LEDs
     init_status_leds();
     rgb_led_status_init();
diff --git a/ports/atmel-samd/supervisor/port.c b/ports/atmel-samd/supervisor/port.c
index d65d098257fc7..fc1d1198e2f45 100644
--- a/ports/atmel-samd/supervisor/port.c
+++ b/ports/atmel-samd/supervisor/port.c
@@ -390,8 +390,8 @@ void reset_cpu(void) {
     reset();
 }
 
-supervisor_allocation* port_fixed_stack(void) {
-    return NULL;
+bool port_has_fixed_stack(void) {
+    return false;
 }
 
 uint32_t *port_stack_get_limit(void) {
diff --git a/ports/cxd56/supervisor/port.c b/ports/cxd56/supervisor/port.c
index 086c2d198ec3a..d69f357799a1a 100644
--- a/ports/cxd56/supervisor/port.c
+++ b/ports/cxd56/supervisor/port.c
@@ -98,12 +98,8 @@ void reset_to_bootloader(void) {
     }
 }
 
-supervisor_allocation _fixed_stack;
-
-supervisor_allocation* port_fixed_stack(void) {
-    _fixed_stack.ptr = port_stack_get_limit();
-    _fixed_stack.length = (port_stack_get_top() - port_stack_get_limit()) * sizeof(uint32_t);
-    return &_fixed_stack;
+bool port_has_fixed_stack(void) {
+    return true;
 }
 
 uint32_t *port_stack_get_limit(void) {
diff --git a/ports/esp32s2/supervisor/port.c b/ports/esp32s2/supervisor/port.c
index aff7dbda4dcdd..264bdee9749ab 100644
--- a/ports/esp32s2/supervisor/port.c
+++ b/ports/esp32s2/supervisor/port.c
@@ -193,12 +193,8 @@ uint32_t *port_stack_get_top(void) {
     return port_stack_get_limit() + ESP_TASK_MAIN_STACK / (sizeof(uint32_t) / sizeof(StackType_t));
 }
 
-supervisor_allocation _fixed_stack;
-
-supervisor_allocation* port_fixed_stack(void) {
-    _fixed_stack.ptr = port_stack_get_limit();
-    _fixed_stack.length = (port_stack_get_top() - port_stack_get_limit()) * sizeof(uint32_t);
-    return &_fixed_stack;
+bool port_has_fixed_stack(void) {
+    return true;
 }
 
 // Place the word to save just after our BSS section that gets blanked.
diff --git a/ports/litex/supervisor/port.c b/ports/litex/supervisor/port.c
index 02617b9af7deb..f5c362ea6e6de 100644
--- a/ports/litex/supervisor/port.c
+++ b/ports/litex/supervisor/port.c
@@ -98,8 +98,8 @@ void reset_cpu(void) {
     for(;;) {}
 }
 
-supervisor_allocation* port_fixed_stack(void) {
-    return NULL;
+bool port_has_fixed_stack(void) {
+    return false;
 }
 
 uint32_t *port_heap_get_bottom(void) {
diff --git a/ports/mimxrt10xx/supervisor/port.c b/ports/mimxrt10xx/supervisor/port.c
index e3fef373f8154..1be2b10396c92 100644
--- a/ports/mimxrt10xx/supervisor/port.c
+++ b/ports/mimxrt10xx/supervisor/port.c
@@ -334,11 +334,8 @@ uint32_t *port_stack_get_top(void) {
     return &_ld_stack_top;
 }
 
-supervisor_allocation _fixed_stack;
-supervisor_allocation* port_fixed_stack(void) {
-    _fixed_stack.ptr = port_stack_get_limit();
-    _fixed_stack.length = (port_stack_get_top() - port_stack_get_limit()) * sizeof(uint32_t);
-    return &_fixed_stack;
+bool port_has_fixed_stack(void) {
+    return true;
 }
 
 uint32_t *port_heap_get_bottom(void) {
diff --git a/ports/nrf/supervisor/port.c b/ports/nrf/supervisor/port.c
index 493de43e0f024..5f1c9f1ba9195 100644
--- a/ports/nrf/supervisor/port.c
+++ b/ports/nrf/supervisor/port.c
@@ -251,8 +251,8 @@ uint32_t *port_heap_get_top(void) {
     return port_stack_get_top();
 }
 
-supervisor_allocation* port_fixed_stack(void) {
-    return NULL;
+bool port_has_fixed_stack(void) {
+    return false;
 }
 
 uint32_t *port_stack_get_limit(void) {
diff --git a/ports/stm/supervisor/port.c b/ports/stm/supervisor/port.c
index a8aab00ff25e3..dba1cf21ee983 100644
--- a/ports/stm/supervisor/port.c
+++ b/ports/stm/supervisor/port.c
@@ -267,8 +267,8 @@ uint32_t *port_heap_get_top(void) {
     return &_ld_heap_end;
 }
 
-supervisor_allocation* port_fixed_stack(void) {
-    return NULL;
+bool port_has_fixed_stack(void) {
+    return false;
 }
 
 uint32_t *port_stack_get_limit(void) {
diff --git a/py/circuitpy_mpconfig.h b/py/circuitpy_mpconfig.h
index 28fd4095c4621..34ea9b022fa2d 100644
--- a/py/circuitpy_mpconfig.h
+++ b/py/circuitpy_mpconfig.h
@@ -858,17 +858,20 @@ extern const struct _mp_obj_module_t wifi_module;
 
 #include "supervisor/flash_root_pointers.h"
 
+// From supervisor/memory.c
+struct _supervisor_allocation_node;
+
 #define CIRCUITPY_COMMON_ROOT_POINTERS \
     const char *readline_hist[8]; \
     vstr_t *repl_line; \
     mp_obj_t rtc_time_source; \
     GAMEPAD_ROOT_POINTERS \
     mp_obj_t pew_singleton; \
-    mp_obj_t terminal_tilegrid_tiles; \
     BOARD_UART_ROOT_POINTER \
     FLASH_ROOT_POINTERS \
     MEMORYMONITOR_ROOT_POINTERS \
     NETWORK_ROOT_POINTERS \
+    struct _supervisor_allocation_node* first_embedded_allocation; \
 
 void supervisor_run_background_tasks_if_tick(void);
 #define RUN_BACKGROUND_TASKS (supervisor_run_background_tasks_if_tick())
diff --git a/shared-module/rgbmatrix/RGBMatrix.c b/shared-module/rgbmatrix/RGBMatrix.c
index 94c3eda27f3ae..a09767b62267e 100644
--- a/shared-module/rgbmatrix/RGBMatrix.c
+++ b/shared-module/rgbmatrix/RGBMatrix.c
@@ -78,10 +78,10 @@ void common_hal_rgbmatrix_rgbmatrix_reconstruct(rgbmatrix_rgbmatrix_obj_t* self,
         // verify that the matrix is big enough
         mp_get_index(mp_obj_get_type(self->framebuffer), self->bufinfo.len, MP_OBJ_NEW_SMALL_INT(self->bufsize-1), false);
     } else {
-        _PM_free(self->bufinfo.buf);
-        _PM_free(self->protomatter.rgbPins);
-        _PM_free(self->protomatter.addr);
-        _PM_free(self->protomatter.screenData);
+        common_hal_rgbmatrix_free_impl(self->bufinfo.buf);
+        common_hal_rgbmatrix_free_impl(self->protomatter.rgbPins);
+        common_hal_rgbmatrix_free_impl(self->protomatter.addr);
+        common_hal_rgbmatrix_free_impl(self->protomatter.screenData);
 
         self->framebuffer = NULL;
         self->bufinfo.buf = common_hal_rgbmatrix_allocator_impl(self->bufsize);
@@ -180,9 +180,6 @@ void common_hal_rgbmatrix_rgbmatrix_deinit(rgbmatrix_rgbmatrix_obj_t* self) {
 
 void rgbmatrix_rgbmatrix_collect_ptrs(rgbmatrix_rgbmatrix_obj_t* self) {
     gc_collect_ptr(self->framebuffer);
-    gc_collect_ptr(self->protomatter.rgbPins);
-    gc_collect_ptr(self->protomatter.addr);
-    gc_collect_ptr(self->protomatter.screenData);
 }
 
 void common_hal_rgbmatrix_rgbmatrix_set_paused(rgbmatrix_rgbmatrix_obj_t* self, bool paused) {
@@ -217,18 +214,10 @@ int common_hal_rgbmatrix_rgbmatrix_get_height(rgbmatrix_rgbmatrix_obj_t* self) {
 }
 
 void *common_hal_rgbmatrix_allocator_impl(size_t sz) {
-    if (gc_alloc_possible()) {
-        return m_malloc_maybe(sz + sizeof(void*), true);
-    } else {
-        supervisor_allocation *allocation = allocate_memory(align32_size(sz), false);
-        return allocation ? allocation->ptr : NULL;
-    }
+    supervisor_allocation *allocation = allocate_memory(align32_size(sz), false, true);
+    return allocation ? allocation->ptr : NULL;
 }
 
 void common_hal_rgbmatrix_free_impl(void *ptr_in) {
-    supervisor_allocation *allocation = allocation_from_ptr(ptr_in);
-
-    if (allocation) {
-        free_memory(allocation);
-    }
+    free_memory(allocation_from_ptr(ptr_in));
 }
diff --git a/shared-module/sharpdisplay/SharpMemoryFramebuffer.c b/shared-module/sharpdisplay/SharpMemoryFramebuffer.c
index b199e98d63efc..4b92bd637ac96 100644
--- a/shared-module/sharpdisplay/SharpMemoryFramebuffer.c
+++ b/shared-module/sharpdisplay/SharpMemoryFramebuffer.c
@@ -34,32 +34,10 @@
 #include "shared-module/sharpdisplay/SharpMemoryFramebuffer.h"
 
 #include "supervisor/memory.h"
-#include "supervisor/shared/safe_mode.h"
 
 #define SHARPMEM_BIT_WRITECMD_LSB (0x80)
 #define SHARPMEM_BIT_VCOM_LSB (0x40)
 
-static void *hybrid_alloc(size_t sz) {
-    supervisor_allocation *allocation = allocate_memory(align32_size(sz), false);
-    if (allocation) {
-        memset(allocation->ptr, 0, sz);
-        return allocation->ptr;
-    }
-    if (gc_alloc_possible()) {
-        return m_malloc(sz, true);
-    }
-    reset_into_safe_mode(MEM_MANAGE);
-    return NULL; // unreached
-}
-
-static inline void hybrid_free(void *ptr_in) {
-    supervisor_allocation *allocation = allocation_from_ptr(ptr_in);
-
-    if (allocation) {
-        free_memory(allocation);
-    }
-}
-
 STATIC uint8_t bitrev(uint8_t n) {
     uint8_t r = 0;
     for(int i=0;i<8;i++) r |= ((n>>i) & 1)<<(7-i);
@@ -102,9 +80,9 @@ void common_hal_sharpdisplay_framebuffer_reset(sharpdisplay_framebuffer_obj_t *s
 }
 
 void common_hal_sharpdisplay_framebuffer_reconstruct(sharpdisplay_framebuffer_obj_t *self) {
-    if (!allocation_from_ptr(self->bufinfo.buf)) {
-        self->bufinfo.buf = NULL;
-    }
+    // Look up the allocation by the old pointer and get the new pointer from it.
+    supervisor_allocation* alloc = allocation_from_ptr(self->bufinfo.buf);
+    self->bufinfo.buf = alloc ? alloc->ptr : NULL;
 }
 
 void common_hal_sharpdisplay_framebuffer_get_bufinfo(sharpdisplay_framebuffer_obj_t *self, mp_buffer_info_t *bufinfo) {
@@ -112,7 +90,12 @@ void common_hal_sharpdisplay_framebuffer_get_bufinfo(sharpdisplay_framebuffer_ob
         int row_stride = common_hal_sharpdisplay_framebuffer_get_row_stride(self);
         int height = common_hal_sharpdisplay_framebuffer_get_height(self);
         self->bufinfo.len = row_stride * height + 2;
-        self->bufinfo.buf = hybrid_alloc(self->bufinfo.len);
+        supervisor_allocation* alloc = allocate_memory(align32_size(self->bufinfo.len), false, true);
+        if (alloc == NULL) {
+            m_malloc_fail(self->bufinfo.len);
+        }
+        self->bufinfo.buf = alloc->ptr;
+        memset(alloc->ptr, 0, self->bufinfo.len);
 
         uint8_t *data = self->bufinfo.buf;
         *data++ = SHARPMEM_BIT_WRITECMD_LSB;
@@ -123,7 +106,9 @@ void common_hal_sharpdisplay_framebuffer_get_bufinfo(sharpdisplay_framebuffer_ob
         }
         self->full_refresh = true;
     }
-    *bufinfo = self->bufinfo;
+    if (bufinfo) {
+        *bufinfo = self->bufinfo;
+    }
 }
 
 void common_hal_sharpdisplay_framebuffer_deinit(sharpdisplay_framebuffer_obj_t *self) {
@@ -137,7 +122,7 @@ void common_hal_sharpdisplay_framebuffer_deinit(sharpdisplay_framebuffer_obj_t *
 
     common_hal_reset_pin(self->chip_select.pin);
 
-    hybrid_free(self->bufinfo.buf);
+    free_memory(allocation_from_ptr(self->bufinfo.buf));
 
     memset(self, 0, sizeof(*self));
 }
@@ -154,19 +139,7 @@ void common_hal_sharpdisplay_framebuffer_construct(sharpdisplay_framebuffer_obj_
     self->height = height;
     self->baudrate = baudrate;
 
-    int row_stride = common_hal_sharpdisplay_framebuffer_get_row_stride(self);
-    self->bufinfo.len = row_stride * height + 2;
-    // re-use a supervisor allocation if possible
-    self->bufinfo.buf = hybrid_alloc(self->bufinfo.len);
-
-    uint8_t *data = self->bufinfo.buf;
-    *data++ = SHARPMEM_BIT_WRITECMD_LSB;
-
-    for(int y=0; y<self->height; y++) {
-        *data = bitrev(y+1);
-        data += row_stride;
-    }
-    self->full_refresh = true;
+    common_hal_sharpdisplay_framebuffer_get_bufinfo(self, NULL);
 }
 
 void common_hal_sharpdisplay_framebuffer_swapbuffers(sharpdisplay_framebuffer_obj_t *self, uint8_t *dirty_row_bitmask) {
@@ -271,7 +244,5 @@ const framebuffer_p_t sharpdisplay_framebuffer_proto = {
 };
 
 void common_hal_sharpdisplay_framebuffer_collect_ptrs(sharpdisplay_framebuffer_obj_t *self) {
-    gc_collect_ptr(self->framebuffer);
     gc_collect_ptr(self->bus);
-    gc_collect_ptr(self->bufinfo.buf);
 }
diff --git a/shared-module/sharpdisplay/SharpMemoryFramebuffer.h b/shared-module/sharpdisplay/SharpMemoryFramebuffer.h
index 8acacc94e1122..08966a89c1f35 100644
--- a/shared-module/sharpdisplay/SharpMemoryFramebuffer.h
+++ b/shared-module/sharpdisplay/SharpMemoryFramebuffer.h
@@ -33,7 +33,6 @@
 
 typedef struct {
     mp_obj_base_t base;
-    mp_obj_t framebuffer;
     busio_spi_obj_t* bus;
     busio_spi_obj_t inline_bus;
     digitalio_digitalinout_obj_t chip_select;
diff --git a/shared-module/usb_midi/__init__.c b/shared-module/usb_midi/__init__.c
index 73a314b9972a3..3fb3f836cd763 100644
--- a/shared-module/usb_midi/__init__.c
+++ b/shared-module/usb_midi/__init__.c
@@ -40,12 +40,12 @@ supervisor_allocation* usb_midi_allocation;
 
 void usb_midi_init(void) {
     // TODO(tannewt): Make this dynamic.
-    uint16_t tuple_size = align32_size(sizeof(mp_obj_tuple_t) + sizeof(mp_obj_t*) * 2);
-    uint16_t portin_size = align32_size(sizeof(usb_midi_portin_obj_t));
-    uint16_t portout_size = align32_size(sizeof(usb_midi_portout_obj_t));
+    size_t tuple_size = align32_size(sizeof(mp_obj_tuple_t) + sizeof(mp_obj_t*) * 2);
+    size_t portin_size = align32_size(sizeof(usb_midi_portin_obj_t));
+    size_t portout_size = align32_size(sizeof(usb_midi_portout_obj_t));
 
     // For each embedded MIDI Jack in the descriptor we create a Port
-    usb_midi_allocation = allocate_memory(tuple_size + portin_size + portout_size, false);
+    usb_midi_allocation = allocate_memory(tuple_size + portin_size + portout_size, false, false);
 
     mp_obj_tuple_t *ports = (mp_obj_tuple_t *) usb_midi_allocation->ptr;
     ports->base.type = &mp_type_tuple;
diff --git a/supervisor/memory.h b/supervisor/memory.h
index f4359ca46ecd8..0f820eac1c475 100755
--- a/supervisor/memory.h
+++ b/supervisor/memory.h
@@ -33,32 +33,45 @@
 
 #include <stdbool.h>
 #include <stdint.h>
+#include <stddef.h>
 
 typedef struct {
     uint32_t* ptr;
-    uint32_t length; // in bytes
 } supervisor_allocation;
 
 
 
-void memory_init(void);
 void free_memory(supervisor_allocation* allocation);
+
+// Find the allocation with the given ptr, NULL if not found. When called from the context of a
+// supervisor_move_memory() callback, finds the allocation that had that ptr *before* the move, but
+// the returned allocation already contains the ptr after the move.
+// When called with NULL, may return either NULL or an unused allocation whose ptr is NULL (this is
+// a feature used internally in allocate_memory to save code size). Passing the return value to
+// free_memory() is a permissible no-op in either case.
 supervisor_allocation* allocation_from_ptr(void *ptr);
+
 supervisor_allocation* allocate_remaining_memory(void);
 
 // Allocate a piece of a given length in bytes. If high_address is true then it should be allocated
 // at a lower address from the top of the stack. Otherwise, addresses will increase starting after
-// statically allocated memory.
-supervisor_allocation* allocate_memory(uint32_t length, bool high_address);
+// statically allocated memory. If movable is false, memory will be taken from outside the GC heap
+// and will stay stationary until freed. While the VM is running, this will fail unless a previous
+// allocation of exactly matching length has recently been freed. If movable is true, memory will be
+// taken from either outside or inside the GC heap, and when the VM exits, will be moved outside.
+// The ptr of the returned supervisor_allocation will change at that point. If you need to be
+// notified of that, add your own callback function at the designated place near the end of
+// supervisor_move_memory().
+supervisor_allocation* allocate_memory(uint32_t length, bool high_address, bool movable);
 
-static inline uint16_t align32_size(uint16_t size) {
-    if (size % 4 != 0) {
-        return (size & 0xfffc) + 0x4;
-    }
-    return size;
+static inline size_t align32_size(size_t size) {
+    return (size + 3) & ~3;
 }
 
-// Called after the heap is freed in case the supervisor wants to save some values.
+size_t get_allocation_length(supervisor_allocation* allocation);
+
+// Called after the GC heap is freed, transfers movable allocations from the GC heap to the
+// supervisor heap and compacts the supervisor heap.
 void supervisor_move_memory(void);
 
 #endif  // MICROPY_INCLUDED_SUPERVISOR_MEMORY_H
diff --git a/supervisor/port.h b/supervisor/port.h
index f5b3c15d1412e..5bc06bc4e1413 100644
--- a/supervisor/port.h
+++ b/supervisor/port.h
@@ -61,7 +61,8 @@ uint32_t *port_stack_get_limit(void);
 // Get stack top address
 uint32_t *port_stack_get_top(void);
 
-supervisor_allocation* port_fixed_stack(void);
+// True if stack is not located inside heap (at the top)
+bool port_has_fixed_stack(void);
 
 // Get heap bottom address
 uint32_t *port_heap_get_bottom(void);
@@ -69,8 +70,6 @@ uint32_t *port_heap_get_bottom(void);
 // Get heap top address
 uint32_t *port_heap_get_top(void);
 
-supervisor_allocation* port_fixed_heap(void);
-
 // Save and retrieve a word from memory that is preserved over reset. Used for safe mode.
 void port_set_saved_word(uint32_t);
 uint32_t port_get_saved_word(void);
diff --git a/supervisor/shared/display.c b/supervisor/shared/display.c
index a9ae25884239f..9c9c66cd7fb69 100644
--- a/supervisor/shared/display.c
+++ b/supervisor/shared/display.c
@@ -81,19 +81,21 @@ void supervisor_start_terminal(uint16_t width_px, uint16_t height_px) {
 
     uint16_t total_tiles = width_in_tiles * height_in_tiles;
 
-    // First try to allocate outside the heap. This will fail when the VM is running.
-    tilegrid_tiles = allocate_memory(align32_size(total_tiles), false);
-    uint8_t* tiles;
-    if (tilegrid_tiles == NULL) {
-        tiles = m_malloc(total_tiles, true);
-        MP_STATE_VM(terminal_tilegrid_tiles) = tiles;
-    } else {
-        tiles = (uint8_t*) tilegrid_tiles->ptr;
+    // Reuse the previous allocation if possible
+    if (tilegrid_tiles) {
+        if (get_allocation_length(tilegrid_tiles) != align32_size(total_tiles)) {
+            free_memory(tilegrid_tiles);
+            tilegrid_tiles = NULL;
+        }
     }
-
-    if (tiles == NULL) {
-        return;
+    if (!tilegrid_tiles) {
+        tilegrid_tiles = allocate_memory(align32_size(total_tiles), false, true);
+        if (!tilegrid_tiles) {
+            return;
+        }
     }
+    uint8_t* tiles = (uint8_t*) tilegrid_tiles->ptr;
+
     grid->y = tall ? blinka_bitmap.height : 0;
     grid->x = tall ? 0 : blinka_bitmap.width;
     grid->top_left_y = 0;
@@ -120,7 +122,6 @@ void supervisor_stop_terminal(void) {
     if (tilegrid_tiles != NULL) {
         free_memory(tilegrid_tiles);
         tilegrid_tiles = NULL;
-        supervisor_terminal_text_grid.inline_tiles = false;
         supervisor_terminal_text_grid.tiles = NULL;
     }
     #endif
@@ -128,20 +129,10 @@ void supervisor_stop_terminal(void) {
 
 void supervisor_display_move_memory(void) {
     #if CIRCUITPY_TERMINALIO
-    displayio_tilegrid_t* grid = &supervisor_terminal_text_grid;
-    if (MP_STATE_VM(terminal_tilegrid_tiles) != NULL &&
-        grid->tiles == MP_STATE_VM(terminal_tilegrid_tiles)) {
-        uint16_t total_tiles = grid->width_in_tiles * grid->height_in_tiles;
-
-        tilegrid_tiles = allocate_memory(align32_size(total_tiles), false);
-        if (tilegrid_tiles != NULL) {
-            memcpy(tilegrid_tiles->ptr, grid->tiles, total_tiles);
-            grid->tiles = (uint8_t*) tilegrid_tiles->ptr;
-        } else {
-            grid->tiles = NULL;
-            grid->inline_tiles = false;
-        }
-        MP_STATE_VM(terminal_tilegrid_tiles) = NULL;
+    if (tilegrid_tiles != NULL) {
+        supervisor_terminal_text_grid.tiles = (uint8_t*) tilegrid_tiles->ptr;
+    } else {
+        supervisor_terminal_text_grid.tiles = NULL;
     }
     #endif
 
diff --git a/supervisor/shared/external_flash/external_flash.c b/supervisor/shared/external_flash/external_flash.c
index 5bde7fd4855f5..e2d767235ee73 100644
--- a/supervisor/shared/external_flash/external_flash.c
+++ b/supervisor/shared/external_flash/external_flash.c
@@ -338,7 +338,7 @@ static bool allocate_ram_cache(void) {
 
     uint32_t table_size = blocks_per_sector * pages_per_block * sizeof(uint32_t);
     // Attempt to allocate outside the heap first.
-    supervisor_cache = allocate_memory(table_size + SPI_FLASH_ERASE_SIZE, false);
+    supervisor_cache = allocate_memory(table_size + SPI_FLASH_ERASE_SIZE, false, false);
     if (supervisor_cache != NULL) {
         MP_STATE_VM(flash_ram_cache) = (uint8_t **) supervisor_cache->ptr;
         uint8_t* page_start = (uint8_t *) supervisor_cache->ptr + table_size;
diff --git a/supervisor/shared/memory.c b/supervisor/shared/memory.c
index 0f96ae273409d..480c322b0112e 100755
--- a/supervisor/shared/memory.c
+++ b/supervisor/shared/memory.c
@@ -27,78 +27,111 @@
 #include "supervisor/memory.h"
 #include "supervisor/port.h"
 
-#include <stddef.h>
+#include <string.h>
 
+#include "py/gc.h"
 #include "supervisor/shared/display.h"
 
-#define CIRCUITPY_SUPERVISOR_ALLOC_COUNT (12)
-
-// Using a zero length to mark an unused allocation makes the code a bit shorter (but makes it
-// impossible to support zero-length allocations).
-#define FREE 0
+enum {
+    CIRCUITPY_SUPERVISOR_IMMOVABLE_ALLOC_COUNT =
+    // stack + heap
+    2
+#ifdef EXTERNAL_FLASH_DEVICES
+    + 1
+#endif
+#if CIRCUITPY_USB_MIDI
+    + 1
+#endif
+    ,
+    CIRCUITPY_SUPERVISOR_MOVABLE_ALLOC_COUNT =
+    0
+#if CIRCUITPY_DISPLAYIO
+    #if CIRCUITPY_TERMINALIO
+        + 1
+    #endif
+    + CIRCUITPY_DISPLAY_LIMIT * (
+        // Maximum needs of one display: max(4 if RGBMATRIX, 1 if SHARPDISPLAY, 0)
+        #if CIRCUITPY_RGBMATRIX
+            4
+        #elif CIRCUITPY_SHARPDISPLAY
+            1
+        #else
+            0
+        #endif
+    )
+#endif
+    ,
+    CIRCUITPY_SUPERVISOR_ALLOC_COUNT = CIRCUITPY_SUPERVISOR_IMMOVABLE_ALLOC_COUNT + CIRCUITPY_SUPERVISOR_MOVABLE_ALLOC_COUNT
+};
 
 // The lowest two bits of a valid length are always zero, so we can use them to mark an allocation
-// as freed by the client but not yet reclaimed into the FREE middle.
+// as a hole (freed by the client but not yet reclaimed into the free middle) and as movable.
+#define FLAGS 3
 #define HOLE 1
+#define MOVABLE 2
 
 static supervisor_allocation allocations[CIRCUITPY_SUPERVISOR_ALLOC_COUNT];
-// We use uint32_t* to ensure word (4 byte) alignment.
-uint32_t* low_address;
-uint32_t* high_address;
+supervisor_allocation* old_allocations;
 
-void memory_init(void) {
-    low_address = port_heap_get_bottom();
-    high_address = port_heap_get_top();
-}
+typedef struct _supervisor_allocation_node {
+    struct _supervisor_allocation_node* next;
+    size_t length;
+    // We use uint32_t to ensure word (4 byte) alignment.
+    uint32_t data[];
+} supervisor_allocation_node;
+
+supervisor_allocation_node* low_head;
+supervisor_allocation_node* high_head;
+
+// Intermediate (void*) is to suppress -Wcast-align warning. Alignment will always be correct
+// because this only reverses how (alloc)->ptr was obtained as &(node->data[0]).
+#define ALLOCATION_NODE(alloc) ((supervisor_allocation_node*)(void*)((char*)((alloc)->ptr) - sizeof(supervisor_allocation_node)))
 
 void free_memory(supervisor_allocation* allocation) {
-    if (allocation == NULL) {
+    if (allocation == NULL || allocation->ptr == NULL) {
         return;
     }
-    int32_t index = 0;
-    bool found = false;
-    for (index = 0; index < CIRCUITPY_SUPERVISOR_ALLOC_COUNT; index++) {
-        found = allocation == &allocations[index];
-        if (found) {
-            break;
-        }
+    supervisor_allocation_node* node = ALLOCATION_NODE(allocation);
+    if (node == low_head) {
+        do {
+            low_head = low_head->next;
+        } while (low_head != NULL && (low_head->length & HOLE));
     }
-    if (!found) {
-        // Bad!
-        // TODO(tannewt): Add a way to escape into safe mode on error.
+    else if (node == high_head) {
+        do {
+            high_head = high_head->next;
+        } while (high_head != NULL && (high_head->length & HOLE));
     }
-    if (allocation->ptr == high_address) {
-        high_address += allocation->length / 4;
-        allocation->length = FREE;
-        for (index++; index < CIRCUITPY_SUPERVISOR_ALLOC_COUNT; index++) {
-            if (!(allocations[index].length & HOLE)) {
-                break;
-            }
-            // Division automatically shifts out the HOLE bit.
-            high_address += allocations[index].length / 4;
-            allocations[index].length = FREE;
+    else {
+        // Check if it's in the list of embedded allocations.
+        supervisor_allocation_node** emb = &MP_STATE_VM(first_embedded_allocation);
+        while (*emb != NULL && *emb != node) {
+            emb = &((*emb)->next);
         }
-    } else if (allocation->ptr + allocation->length / 4 == low_address) {
-        low_address = allocation->ptr;
-        allocation->length = FREE;
-        for (index--; index >= 0; index--) {
-            if (!(allocations[index].length & HOLE)) {
-                break;
-            }
-            low_address -= allocations[index].length / 4;
-            allocations[index].length = FREE;
+        if (*emb != NULL) {
+            // Found, remove it from the list.
+            *emb = node->next;
+            m_free(node
+#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
+                , sizeof(supervisor_allocation_node) + (node->length & ~FLAGS)
+#endif
+            );
+        }
+        else {
+            // Else it must be within the low or high ranges and becomes a hole.
+            node->length = ((node->length & ~FLAGS) | HOLE);
         }
-    } else {
-        // Freed memory isn't in the middle so skip updating bounds. The memory will be added to the
-        // middle when the memory to the inside is freed. We still need its length, but setting
-        // only the lowest bit is nondestructive.
-        allocation->length |= HOLE;
     }
+    allocation->ptr = NULL;
 }
 
 supervisor_allocation* allocation_from_ptr(void *ptr) {
+    // When called from the context of supervisor_move_memory() (old_allocations != NULL), search
+    // by old pointer to give clients a way of mapping from old to new pointer. But not if
+    // ptr == NULL, then the caller wants an allocation whose current ptr is NULL.
+    supervisor_allocation* list = (old_allocations && ptr) ? old_allocations : &allocations[0];
     for (size_t index = 0; index < CIRCUITPY_SUPERVISOR_ALLOC_COUNT; index++) {
-        if (allocations[index].ptr == ptr) {
+        if (list[index].ptr == ptr) {
             return &allocations[index];
         }
     }
@@ -106,50 +139,182 @@ supervisor_allocation* allocation_from_ptr(void *ptr) {
 }
 
 supervisor_allocation* allocate_remaining_memory(void) {
-    if (low_address == high_address) {
-        return NULL;
+    return allocate_memory((uint32_t)-1, false, false);
+}
+
+static supervisor_allocation_node* find_hole(supervisor_allocation_node* node, size_t length) {
+    for (; node != NULL; node = node->next) {
+        if (node->length == (length | HOLE)) {
+            break;
+        }
     }
-    return allocate_memory((high_address - low_address) * 4, false);
+    return node;
 }
 
-supervisor_allocation* allocate_memory(uint32_t length, bool high) {
+static supervisor_allocation_node* allocate_memory_node(uint32_t length, bool high, bool movable) {
+    if (CIRCUITPY_SUPERVISOR_MOVABLE_ALLOC_COUNT == 0) {
+        assert(!movable);
+    }
+    // supervisor_move_memory() currently does not support movable allocations on the high side, it
+    // must be extended first if this is ever needed.
+    assert(!(high && movable));
+    uint32_t* low_address = low_head ? low_head->data + low_head->length / 4 : port_heap_get_bottom();
+    uint32_t* high_address = high_head ? (uint32_t*)high_head : port_heap_get_top();
+    // Special case for allocate_remaining_memory(), avoids computing low/high_address twice.
+    if (length == (uint32_t)-1) {
+        length = (high_address - low_address) * 4 - sizeof(supervisor_allocation_node);
+    }
     if (length == 0 || length % 4 != 0) {
         return NULL;
     }
-    uint8_t index = 0;
-    int8_t direction = 1;
-    if (high) {
-        index = CIRCUITPY_SUPERVISOR_ALLOC_COUNT - 1;
-        direction = -1;
-    }
-    supervisor_allocation* alloc;
-    for (; index < CIRCUITPY_SUPERVISOR_ALLOC_COUNT; index += direction) {
-        alloc = &allocations[index];
-        if (alloc->length == FREE && (high_address - low_address) * 4 >= (int32_t) length) {
-            break;
+    // 1. Matching hole on the requested side?
+    supervisor_allocation_node* node = find_hole(high ? high_head : low_head, length);
+    if (!node) {
+        // 2. Enough free space in the middle?
+        if ((high_address - low_address) * 4 >= (int32_t)(sizeof(supervisor_allocation_node) + length)) {
+            if (high) {
+                high_address -= (sizeof(supervisor_allocation_node) + length) / 4;
+                node = (supervisor_allocation_node*)high_address;
+                node->next = high_head;
+                high_head = node;
+            }
+            else {
+                node = (supervisor_allocation_node*)low_address;
+                node->next = low_head;
+                low_head = node;
+            }
         }
-        // If a hole matches in length exactly, we can reuse it.
-        if (alloc->length == (length | HOLE)) {
-            alloc->length = length;
-            return alloc;
+        else {
+            // 3. Matching hole on the other side?
+            node = find_hole(high ? low_head : high_head, length);
+            if (!node) {
+                // 4. GC allocation?
+                if (movable && gc_alloc_possible()) {
+                    node = m_malloc_maybe(sizeof(supervisor_allocation_node) + length, true);
+                    if (node) {
+                        node->next = MP_STATE_VM(first_embedded_allocation);
+                        MP_STATE_VM(first_embedded_allocation) = node;
+                    }
+                }
+                if (!node) {
+                    // 5. Give up.
+                    return NULL;
+                }
+            }
         }
     }
-    if (index >= CIRCUITPY_SUPERVISOR_ALLOC_COUNT) {
+    node->length = length;
+    if (movable) {
+        node->length |= MOVABLE;
+    }
+    return node;
+}
+
+supervisor_allocation* allocate_memory(uint32_t length, bool high, bool movable) {
+    supervisor_allocation_node* node = allocate_memory_node(length, high, movable);
+    if (!node) {
         return NULL;
     }
-    if (high) {
-        high_address -= length / 4;
-        alloc->ptr = high_address;
-    } else {
-        alloc->ptr = low_address;
-        low_address += length / 4;
+    // Find the first free allocation.
+    supervisor_allocation* alloc = allocation_from_ptr(NULL);
+    if (!alloc) {
+        // We should free node again to avoid leaking, but something is wrong anyway if clients try
+        // to make more allocations than available, so don't bother.
+        return NULL;
     }
-    alloc->length = length;
+    alloc->ptr = &(node->data[0]);
     return alloc;
 }
 
+size_t get_allocation_length(supervisor_allocation* allocation) {
+    return ALLOCATION_NODE(allocation)->length & ~FLAGS;
+}
+
 void supervisor_move_memory(void) {
+    // This whole function is not needed when there are no movable allocations, let it be optimized
+    // out.
+    if (CIRCUITPY_SUPERVISOR_MOVABLE_ALLOC_COUNT == 0) {
+        return;
+    }
+    // This must be called exactly after freeing the heap, so that the embedded allocations, if any,
+    // are now in the free region.
+    assert(MP_STATE_VM(first_embedded_allocation) == NULL || (low_head < MP_STATE_VM(first_embedded_allocation) && MP_STATE_VM(first_embedded_allocation) < high_head));
+
+    // Save the old pointers for allocation_from_ptr().
+    supervisor_allocation old_allocations_array[CIRCUITPY_SUPERVISOR_ALLOC_COUNT];
+    memcpy(old_allocations_array, allocations, sizeof(allocations));
+
+    // Compact the low side. Traverse the list repeatedly, finding movable allocations preceded by a
+    // hole and swapping them, until no more are found. This is not the most runtime-efficient way,
+    // but probably the shortest and simplest code.
+    bool acted;
+    do {
+        acted = false;
+        supervisor_allocation_node** nodep = &low_head;
+        while (*nodep != NULL && (*nodep)->next != NULL) {
+            if (((*nodep)->length & MOVABLE) && ((*nodep)->next->length & HOLE)) {
+                supervisor_allocation_node* oldnode = *nodep;
+                supervisor_allocation_node* start = oldnode->next;
+                supervisor_allocation* alloc = allocation_from_ptr(&(oldnode->data[0]));
+                assert(alloc != NULL);
+                alloc->ptr = &(start->data[0]);
+                oldnode->next = start->next;
+                size_t holelength = start->length;
+                size_t size = sizeof(supervisor_allocation_node) + (oldnode->length & ~FLAGS);
+                memmove(start, oldnode, size);
+                supervisor_allocation_node* newhole = (supervisor_allocation_node*)(void*)((char*)start + size);
+                newhole->next = start;
+                newhole->length = holelength;
+                *nodep = newhole;
+                acted = true;
+            }
+            nodep = &((*nodep)->next);
+        }
+    } while (acted);
+    // Any holes bubbled to the top can be absorbed into the free middle.
+    while (low_head != NULL && (low_head->length & HOLE)) {
+        low_head = low_head->next;
+    };
+
+    // Don't bother compacting the high side, there are no movable allocations and no holes there in
+    // current usage.
+
+    // Promote the embedded allocations to top-level ones, compacting them at the beginning of the
+    // now free region (or possibly in matching holes).
+    // The linked list is unordered, but allocations must be processed in order to avoid risking
+    // overwriting each other. To that end, repeatedly find the lowest element of the list, remove
+    // it from the list, and process it. This ad-hoc selection sort results in substantially shorter
+    // code than using the qsort() function from the C library.
+    while (MP_STATE_VM(first_embedded_allocation)) {
+        // First element is first candidate.
+        supervisor_allocation_node** pminnode = &MP_STATE_VM(first_embedded_allocation);
+        // Iterate from second element (if any) on.
+        for (supervisor_allocation_node** pnode = &(MP_STATE_VM(first_embedded_allocation)->next); *pnode != NULL; pnode = &(*pnode)->next) {
+            if (*pnode < *pminnode) {
+                pminnode = pnode;
+            }
+        }
+        // Remove from list.
+        supervisor_allocation_node* node = *pminnode;
+        *pminnode = node->next;
+        // Process.
+        size_t length = (node->length & ~FLAGS);
+        supervisor_allocation* alloc = allocation_from_ptr(&(node->data[0]));
+        assert(alloc != NULL);
+        // This may overwrite the header of node if it happened to be there already, but not the
+        // data.
+        supervisor_allocation_node* new_node = allocate_memory_node(length, false, true);
+        // There must be enough free space.
+        assert(new_node != NULL);
+        memmove(&(new_node->data[0]), &(node->data[0]), length);
+        alloc->ptr = &(new_node->data[0]);
+    }
+
+    // Notify clients that their movable allocations may have moved.
+    old_allocations = &old_allocations_array[0];
     #if CIRCUITPY_DISPLAYIO
     supervisor_display_move_memory();
     #endif
+    // Add calls to further clients here.
+    old_allocations = NULL;
 }
diff --git a/supervisor/shared/stack.c b/supervisor/shared/stack.c
index e7aa956b01612..afea204010d7e 100755
--- a/supervisor/shared/stack.c
+++ b/supervisor/shared/stack.c
@@ -34,36 +34,42 @@
 
 extern uint32_t _estack;
 
+// Requested size.
 static uint32_t next_stack_size = CIRCUITPY_DEFAULT_STACK_SIZE;
 static uint32_t current_stack_size = 0;
-supervisor_allocation* stack_alloc = NULL;
+// Actual location and size, may be larger than requested.
+static uint32_t* stack_limit = NULL;
+static size_t stack_length = 0;
 
 #define EXCEPTION_STACK_SIZE 1024
 
 void allocate_stack(void) {
 
-    if (port_fixed_stack() != NULL) {
-        stack_alloc = port_fixed_stack();
-        current_stack_size = stack_alloc->length;
+    if (port_has_fixed_stack()) {
+        stack_limit = port_stack_get_limit();
+        stack_length = (port_stack_get_top() - stack_limit)*sizeof(uint32_t);
+        current_stack_size = stack_length;
     } else {
         mp_uint_t regs[10];
         mp_uint_t sp = cpu_get_regs_and_sp(regs);
 
         mp_uint_t c_size = (uint32_t) port_stack_get_top() - sp;
-        stack_alloc = allocate_memory(c_size + next_stack_size + EXCEPTION_STACK_SIZE, true);
+        supervisor_allocation* stack_alloc = allocate_memory(c_size + next_stack_size + EXCEPTION_STACK_SIZE, true, false);
         if (stack_alloc == NULL) {
-            stack_alloc = allocate_memory(c_size + CIRCUITPY_DEFAULT_STACK_SIZE + EXCEPTION_STACK_SIZE, true);
+            stack_alloc = allocate_memory(c_size + CIRCUITPY_DEFAULT_STACK_SIZE + EXCEPTION_STACK_SIZE, true, false);
             current_stack_size = CIRCUITPY_DEFAULT_STACK_SIZE;
         } else {
             current_stack_size = next_stack_size;
         }
+        stack_limit = stack_alloc->ptr;
+        stack_length = get_allocation_length(stack_alloc);
     }
 
-    *stack_alloc->ptr = STACK_CANARY_VALUE;
+    *stack_limit = STACK_CANARY_VALUE;
 }
 
 inline bool stack_ok(void) {
-    return stack_alloc == NULL || *stack_alloc->ptr == STACK_CANARY_VALUE;
+    return stack_limit == NULL || *stack_limit == STACK_CANARY_VALUE;
 }
 
 inline void assert_heap_ok(void) {
@@ -77,18 +83,26 @@ void stack_init(void) {
 }
 
 void stack_resize(void) {
-    if (stack_alloc == NULL) {
+    if (stack_limit == NULL) {
         return;
     }
     if (next_stack_size == current_stack_size) {
-        *stack_alloc->ptr = STACK_CANARY_VALUE;
+        *stack_limit = STACK_CANARY_VALUE;
         return;
     }
-    free_memory(stack_alloc);
-    stack_alloc = NULL;
+    free_memory(allocation_from_ptr(stack_limit));
+    stack_limit = NULL;
     allocate_stack();
 }
 
+uint32_t* stack_get_bottom(void) {
+    return stack_limit;
+}
+
+size_t stack_get_length(void) {
+    return stack_length;
+}
+
 void set_next_stack_size(uint32_t size) {
     next_stack_size = size;
 }
diff --git a/supervisor/shared/stack.h b/supervisor/shared/stack.h
index 7096f0b3edaf4..1c75de5f78ba0 100755
--- a/supervisor/shared/stack.h
+++ b/supervisor/shared/stack.h
@@ -31,10 +31,12 @@
 
 #include "supervisor/memory.h"
 
-extern supervisor_allocation* stack_alloc;
-
 void stack_init(void);
 void stack_resize(void);
+// Actual stack location and size, may be larger than requested.
+uint32_t* stack_get_bottom(void);
+size_t stack_get_length(void);
+// Next/current requested stack size.
 void set_next_stack_size(uint32_t size);
 uint32_t get_current_stack_size(void);
 bool stack_ok(void);