diff --git a/compat/mimalloc/alloc-aligned.c b/compat/mimalloc/alloc-aligned.c index 5594b6d38387d2..ce519a18c381a7 100644 --- a/compat/mimalloc/alloc-aligned.c +++ b/compat/mimalloc/alloc-aligned.c @@ -18,9 +18,9 @@ terms of the MIT license. A copy of the license can be found in the file static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { mi_assert_internal(size <= PTRDIFF_MAX); - mi_assert_internal(alignment!=0 && _mi_is_power_of_two(alignment) && alignment <= MI_ALIGNMENT_MAX); + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); - const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` const size_t padsize = size + MI_PADDING_SIZE; // use regular allocation if it is guaranteed to fit the alignment constraints @@ -30,17 +30,61 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* return p; } - // otherwise over-allocate - void* p = _mi_heap_malloc_zero(heap, size + alignment - 1, zero); - if (p == NULL) return NULL; + void* p; + size_t oversize; + if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { + // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) + // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the + // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down) + if mi_unlikely(offset != 0) { + // todo: cannot support offset alignment for very large alignments yet + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); + #endif + return NULL; + } + oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); + p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block + // zero afterwards as only the area from the aligned_p may be committed! + if (p == NULL) return NULL; + } + else { + // otherwise over-allocate + oversize = size + alignment - 1; + p = _mi_heap_malloc_zero(heap, oversize, zero); + if (p == NULL) return NULL; + } // .. and align within the allocation - uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); - mi_assert_internal(adjust <= alignment); - void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); - if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); - mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask; + const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset); + mi_assert_internal(adjust < alignment); + void* aligned_p = (void*)((uintptr_t)p + adjust); + if (aligned_p != p) { + mi_page_set_has_aligned(_mi_ptr_page(p), true); + } + + mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); + mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); + + // now zero the block if needed + if (zero && alignment > MI_ALIGNMENT_MAX) { + const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p; + const ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE; + if (zsize > 0) { _mi_memzero(aligned_p, zsize); } + } + + #if MI_TRACK_ENABLED + if (p != aligned_p) { + mi_track_free_size(p, oversize); + mi_track_malloc(aligned_p, size, zero); + } + else { + mi_track_resize(aligned_p, oversize, size); + } + #endif return aligned_p; } @@ -49,19 +93,21 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t { // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. mi_assert(alignment > 0); - if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) { // require power-of-two (see ) + if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } - if (mi_unlikely(alignment > MI_ALIGNMENT_MAX)) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) + /* + if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment); #endif return NULL; } - if (mi_unlikely(size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) + */ + if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); #endif @@ -71,18 +117,18 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check // try first if there happens to be a small block available with just the right alignment - if (mi_likely(padsize <= MI_SMALL_SIZE_MAX)) { + if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; - if (mi_likely(page->free != NULL && is_aligned)) + if mi_likely(page->free != NULL && is_aligned) { #if MI_STAT>1 mi_heap_stat_increase(heap, malloc, size); #endif - void* p = _mi_page_malloc(heap, page, padsize); // TODO: inline _mi_page_malloc + void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); - if (zero) { _mi_block_zero_init(page, p, size); } + mi_track_malloc(p,size,zero); return p; } } @@ -95,19 +141,19 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t // Optimized mi_heap_malloc_aligned / mi_malloc_aligned // ------------------------------------------------------ -mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); } -mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { #if !MI_PADDING // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) if (!_mi_is_power_of_two(alignment)) return NULL; - if (mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)) + if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) #else // with padding, we can only guarantee this for fixed alignments - if (mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))) - && size <= MI_SMALL_SIZE_MAX)) + if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))) + && size <= MI_SMALL_SIZE_MAX) #endif { // fast path for common alignment and size @@ -122,45 +168,45 @@ mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size // Aligned Allocation // ------------------------------------------------------ -mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true); } -mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(heap, size, alignment, 0); } -mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } -mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned_at(heap,count,size,alignment,0); } -mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); } -mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); } -mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); } -mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); } -mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); } -mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); } @@ -207,54 +253,54 @@ static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsi return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero); } -void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false); } -void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false); } -void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true); } -void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true); } -void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } -void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } -void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); } -void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); } -void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); } -void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); } -void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); } -void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); } diff --git a/compat/mimalloc/alloc.c b/compat/mimalloc/alloc.c index 8f084d3ad35170..027421abf60320 100644 --- a/compat/mimalloc/alloc.c +++ b/compat/mimalloc/alloc.c @@ -12,6 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" + #include // memset, strlen #include // malloc, exit @@ -21,11 +22,11 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* const block = page->free; - if (mi_unlikely(block == NULL)) { - return _mi_malloc_generic(heap, size); + if mi_unlikely(block == NULL) { + return _mi_malloc_generic(heap, size, zero, 0); } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list @@ -33,10 +34,24 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz page->free = mi_block_next(page, block); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); -#if (MI_DEBUG>0) - if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } + // allow use of the block internally + // note: when tracking we need to avoid ever touching the MI_PADDING since + // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`) + mi_track_mem_undefined(block, mi_page_usable_block_size(page)); + + // zero the block? note: we need to zero the full block size (issue #63) + if mi_unlikely(zero) { + mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) + const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size); + _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE); + } + +#if (MI_DEBUG>0) && !MI_TRACK_ENABLED + if (!page->is_zero && !zero && !mi_page_is_huge(page)) { + memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); + } #elif (MI_SECURE!=0) - block->next = 0; // don't leak internal data + if (!zero) { block->next = 0; } // don't leak internal data #endif #if (MI_STAT>0) @@ -51,55 +66,69 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } #endif -#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) +#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + #if (MI_DEBUG>1) mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess + #endif padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); padding->delta = (uint32_t)(delta); - uint8_t* fill = (uint8_t*)padding - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes - for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)padding - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } + } #endif return block; } -// allocate a small block -extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local +static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { + mi_assert(heap != NULL); + #if MI_DEBUG + const uintptr_t tid = _mi_thread_id(); + mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local + #endif mi_assert(size <= MI_SMALL_SIZE_MAX); - #if (MI_PADDING) +#if (MI_PADDING) if (size == 0) { size = sizeof(void*); } - #endif - mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); - void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); - mi_assert_internal(p==NULL || mi_usable_size(p) >= size); - #if MI_STAT>1 +#endif + mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); + void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); + mi_assert_internal(p == NULL || mi_usable_size(p) >= size); +#if MI_STAT>1 if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } - #endif +#endif + mi_track_malloc(p,size,zero); return p; } -extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { +// allocate a small block +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { + return mi_heap_malloc_small_zero(heap, size, false); +} + +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small(mi_get_default_heap(), size); } // The main allocation function -extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { - if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { - return mi_heap_malloc_small(heap, size); +extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { + if mi_likely(size <= MI_SMALL_SIZE_MAX) { + mi_assert_internal(huge_alignment == 0); + return mi_heap_malloc_small_zero(heap, size, zero); } else { mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local - void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_assert_internal(p == NULL || mi_usable_size(p) >= size); #if MI_STAT>1 if (p != NULL) { @@ -107,55 +136,33 @@ extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif + mi_track_malloc(p,size,zero); return p; } } -extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { - return mi_heap_malloc(mi_get_default_heap(), size); +extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { + return _mi_heap_malloc_zero_ex(heap, size, zero, 0); } - -void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { - // note: we need to initialize the whole usable block size to zero, not just the requested size, - // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - MI_UNUSED(size); - mi_assert_internal(p != NULL); - mi_assert_internal(mi_usable_size(p) >= size); // size can be zero - mi_assert_internal(_mi_ptr_page(p)==page); - if (page->is_zero && size > sizeof(mi_block_t)) { - // already zero initialized memory - ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p))); - } - else { - // otherwise memset - memset(p, 0, mi_usable_size(p)); - } +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { + return _mi_heap_malloc_zero(heap, size, false); } -// zero initialized small block -mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { - void* p = mi_malloc_small(size); - if (p != NULL) { - _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again? - } - return p; +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { + return mi_heap_malloc(mi_get_default_heap(), size); } -void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { - void* p = mi_heap_malloc(heap,size); - if (zero && p != NULL) { - _mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again? - } - return p; +// zero initialized small block +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { + return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true); } -extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { return _mi_heap_malloc_zero(heap, size, true); } -mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { return mi_heap_zalloc(mi_get_default_heap(),size); } @@ -188,16 +195,19 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con return false; } +#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); } + static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + bool is_double_free = false; mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) - return mi_check_is_double_freex(page, block); + is_double_free = mi_check_is_double_freex(page, block); } - return false; + return is_double_free; } #else static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { @@ -211,12 +221,19 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Check for heap block overflow by setting up padding at the end of the block // --------------------------------------------------------------------------- -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) +#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { *bsize = mi_page_usable_block_size(page); const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); *delta = padding->delta; - return ((uint32_t)mi_ptr_encode(page,block,page->keys) == padding->canary && *delta <= *bsize); + uint32_t canary = padding->canary; + uintptr_t keys[2]; + keys[0] = page->keys[0]; + keys[1] = page->keys[1]; + bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); + return ok; } // Return the exact usable size of a block. @@ -236,15 +253,20 @@ static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, si if (!ok) return false; mi_assert_internal(bsize >= delta); *size = bsize - delta; - uint8_t* fill = (uint8_t*)block + bsize - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes - for (size_t i = 0; i < maxpad; i++) { - if (fill[i] != MI_DEBUG_PADDING) { - *wrong = bsize - delta + i; - return false; + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + mi_track_mem_defined(fill, maxpad); + for (size_t i = 0; i < maxpad; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - delta + i; + ok = false; + break; + } } + mi_track_mem_noaccess(fill, maxpad); } - return true; + return ok; } static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { @@ -321,6 +343,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { } #endif +#if MI_HUGE_PAGE_ABANDON #if (MI_STAT>0) // maintain stats for huge objects static void mi_stat_huge_free(const mi_page_t* page) { @@ -338,37 +361,49 @@ static void mi_stat_huge_free(const mi_page_t* page) { MI_UNUSED(page); } #endif +#endif // ------------------------------------------------------ // Free // ------------------------------------------------------ -// multi-threaded free +// multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { // The padding check may access the non-thread-owned page for the key values. // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); - mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection - #if (MI_DEBUG!=0) - memset(block, MI_DEBUG_FREED, mi_usable_size(block)); - #endif + mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); - if (segment->kind==MI_SEGMENT_HUGE) { + if (segment->kind == MI_SEGMENT_HUGE) { + #if MI_HUGE_PAGE_ABANDON + // huge page segments are always abandoned and can be freed immediately mi_stat_huge_free(page); _mi_segment_huge_page_free(segment, page, block); return; + #else + // huge pages are special as they occupy the entire segment + // as these are large we reset the memory occupied by the page so it is available to other threads + // (as the owning thread needs to actually free the memory later). + _mi_segment_huge_page_reset(segment, page, block); + #endif } + #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading + if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + } + #endif + // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfreex; bool use_delayed; mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); - if (mi_unlikely(use_delayed)) { + if mi_unlikely(use_delayed) { // unlikely: this only happens on the first concurrent free in a page that is in the full list tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); } @@ -379,7 +414,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc } } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); - if (mi_unlikely(use_delayed)) { + if mi_unlikely(use_delayed) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); mi_assert_internal(heap != NULL); @@ -405,20 +440,23 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { // and push it on the free list - if (mi_likely(local)) { + //const size_t bsize = mi_page_block_size(page); + if mi_likely(local) { // owning thread can free a block directly - if (mi_unlikely(mi_check_is_double_free(page, block))) return; + if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); - #if (MI_DEBUG!=0) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED + if (!mi_page_is_huge(page)) { // huge page content may be already decommitted + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + } #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; - if (mi_unlikely(mi_page_all_free(page))) { + if mi_unlikely(mi_page_all_free(page)) { _mi_page_retire(page); } - else if (mi_unlikely(mi_page_is_in_full(page))) { + else if mi_unlikely(mi_page_is_in_full(page)) { _mi_page_unfull(page); } } @@ -437,11 +475,11 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p } -static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) mi_attr_noexcept { - mi_page_t* const page = _mi_segment_page_of(segment, p); +void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); - mi_stat_free(page, block); - _mi_free_block(page, local, block); + mi_stat_free(page, block); // stat_free may access the padding + mi_track_free(p); + _mi_free_block(page, is_local, block); } // Get the segment data belonging to a pointer @@ -450,65 +488,81 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool l static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) { MI_UNUSED(msg); + mi_assert(p != NULL); + #if (MI_DEBUG>0) - if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { + if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) { _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); return NULL; } #endif mi_segment_t* const segment = _mi_ptr_segment(p); - if (mi_unlikely(segment == NULL)) return NULL; // checks also for (p==NULL) + mi_assert_internal(segment != NULL); #if (MI_DEBUG>0) - if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" - "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); - if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { - _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + if mi_unlikely(!mi_is_in_heap_region(p)) { + #if (MI_INTPTR_SIZE == 8 && defined(__linux__)) + if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640) + #else + { + #endif + _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); + if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + } } } #endif #if (MI_DEBUG>0 || MI_SECURE>=4) - if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { + if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) { _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); return NULL; } #endif + return segment; } // Free a block +// fast path written carefully to prevent spilling on the stack void mi_free(void* p) mi_attr_noexcept { + if mi_unlikely(p == NULL) return; mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); - if (mi_unlikely(segment == NULL)) return; - - mi_threadid_t tid = _mi_thread_id(); - mi_page_t* const page = _mi_segment_page_of(segment, p); - - if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks - // local, and not full or aligned - mi_block_t* block = (mi_block_t*)(p); - if (mi_unlikely(mi_check_is_double_free(page,block))) return; - mi_check_padding(page, block); - mi_stat_free(page, block); - #if (MI_DEBUG!=0) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); - #endif - mi_block_set_next(page, block, page->local_free); - page->local_free = block; - if (mi_unlikely(--page->used == 0)) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) - _mi_page_retire(page); + const bool is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + mi_page_t* const page = _mi_segment_page_of(segment, p); + + if mi_likely(is_local) { // thread-local free? + if mi_likely(page->flags.full_aligned == 0) // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) + { + mi_block_t* const block = (mi_block_t*)p; + if mi_unlikely(mi_check_is_double_free(page, block)) return; + mi_check_padding(page, block); + mi_stat_free(page, block); + #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif + mi_track_free(p); + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) + _mi_page_retire(page); + } + } + else { + // page is full or contains (inner) aligned blocks; use generic path + _mi_free_generic(segment, page, true, p); } } else { - // non-local, aligned blocks, or a full page; use the more generic path - // note: recalc page in generic to improve code generation - mi_free_generic(segment, tid == segment->thread_id, p); + // not thread-local; use generic path + _mi_free_generic(segment, page, false, p); } } +// return true if successful bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page const mi_segment_t* const segment = _mi_ptr_segment(block); @@ -521,7 +575,9 @@ bool _mi_free_delayed_block(mi_block_t* block) { // some blocks may end up in the page `thread_free` list with no blocks in the // heap `thread_delayed_free` list which may cause the page to be never freed! // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); + if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) { + return false; + } // collect all other non-local frees to ensure up-to-date `used` count _mi_page_free_collect(page, false); @@ -541,10 +597,10 @@ mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t } static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { + if (p == NULL) return 0; const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); - if (segment==NULL) return 0; // also returns 0 if `p == NULL` const mi_page_t* const page = _mi_segment_page_of(segment, p); - if (mi_likely(!mi_page_has_aligned(page))) { + if mi_likely(!mi_page_has_aligned(page)) { const mi_block_t* block = (const mi_block_t*)p; return mi_page_usable_size_of(page, block); } @@ -554,28 +610,11 @@ static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noe } } -size_t mi_usable_size(const void* p) mi_attr_noexcept { +mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { return _mi_usable_size(p, "mi_usable_size"); } -// ------------------------------------------------------ -// ensure explicit external inline definitions are emitted! -// ------------------------------------------------------ - -#ifdef __cplusplus -void* _mi_externs[] = { - (void*)&_mi_page_malloc, - (void*)&mi_malloc, - (void*)&mi_malloc_small, - (void*)&mi_zalloc_small, - (void*)&mi_heap_malloc, - (void*)&mi_heap_zalloc, - (void*)&mi_heap_malloc_small -}; -#endif - - // ------------------------------------------------------ // Allocation extensions // ------------------------------------------------------ @@ -598,24 +637,24 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { mi_free(p); } -extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } -mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { return mi_heap_calloc(mi_get_default_heap(),count,size); } // Uninitialized `calloc` -extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } -mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { return mi_heap_mallocn(mi_get_default_heap(),count,size); } @@ -634,31 +673,40 @@ void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { } void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept { - const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL - if (mi_unlikely(newsize <= size && newsize >= (size / 2))) { + // if p == NULL then behave as malloc. + // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)). + // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.) + const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0) + if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0) // todo: adjust potential padding to reflect the new size? + mi_track_free_size(p, size); + mi_track_malloc(p,newsize,true); return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); - if (mi_likely(newp != NULL)) { + if mi_likely(newp != NULL) { if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); memset((uint8_t*)newp + start, 0, newsize - start); } - if (mi_likely(p != NULL)) { - _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); + if mi_likely(p != NULL) { + if mi_likely(_mi_is_aligned(p, sizeof(uintptr_t))) { // a client may pass in an arbitrary pointer `p`.. + const size_t copysize = (newsize > size ? size : newsize); + mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. + _mi_memcpy_aligned(newp, p, copysize); + } mi_free(p); // only free the original pointer if successful } } return newp; } -void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, false); } -void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); @@ -666,41 +714,41 @@ void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_a // Reallocate but free `p` on errors -void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { void* newp = mi_heap_realloc(heap, p, newsize); if (newp==NULL && p!=NULL) mi_free(p); return newp; } -void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, true); } -void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } -void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_realloc(mi_get_default_heap(),p,newsize); } -void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_reallocn(mi_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors -void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_reallocf(mi_get_default_heap(),p,newsize); } -void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); } -void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_recalloc(mi_get_default_heap(), p, count, size); } @@ -711,20 +759,22 @@ void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { // ------------------------------------------------------ // `strdup` using mi_malloc -mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { if (s == NULL) return NULL; size_t n = strlen(s); char* t = (char*)mi_heap_malloc(heap,n+1); - if (t != NULL) _mi_memcpy(t, s, n + 1); + if (t == NULL) return NULL; + _mi_memcpy(t, s, n); + t[n] = 0; return t; } -mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { return mi_heap_strdup(mi_get_default_heap(), s); } // `strndup` using mi_malloc -mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { if (s == NULL) return NULL; const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found) const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string @@ -736,7 +786,7 @@ mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) return t; } -mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { return mi_heap_strndup(mi_get_default_heap(),s,n); } @@ -747,7 +797,7 @@ mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { #define PATH_MAX MAX_PATH #endif #include -mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL); @@ -765,6 +815,7 @@ mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char } } #else +/* #include // pathconf static size_t mi_path_max(void) { static size_t path_max = 0; @@ -776,24 +827,35 @@ static size_t mi_path_max(void) { } return path_max; } - +*/ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { if (resolved_name != NULL) { return realpath(fname,resolved_name); } else { - size_t n = mi_path_max(); + char* rname = realpath(fname, NULL); + if (rname == NULL) return NULL; + char* result = mi_heap_strdup(heap, rname); + free(rname); // use regular free! (which may be redirected to our free but that's ok) + return result; + } + /* + const size_t n = mi_path_max(); char* buf = (char*)mi_malloc(n+1); - if (buf==NULL) return NULL; + if (buf == NULL) { + errno = ENOMEM; + return NULL; + } char* rname = realpath(fname,buf); char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL` mi_free(buf); return result; } + */ } #endif -mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); } #endif @@ -831,8 +893,8 @@ static bool mi_try_new_handler(bool nothrow) { #else typedef void (*std_new_handler_t)(void); -#if (defined(__GNUC__) || defined(__clang__)) -std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv(void) { +#if (defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER))) // exclude clang-cl, see issue #631 +std_new_handler_t __attribute__((weak)) _ZSt15get_new_handlerv(void) { return NULL; } static std_new_handler_t mi_get_new_handler(void) { @@ -861,27 +923,53 @@ static bool mi_try_new_handler(bool nothrow) { } #endif -static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) { +static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { - p = mi_malloc(size); + p = mi_heap_malloc(heap,size); } return p; } -mi_decl_restrict void* mi_new(size_t size) { - void* p = mi_malloc(size); - if (mi_unlikely(p == NULL)) return mi_try_new(size,false); +static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) { + return mi_heap_try_new(mi_get_default_heap(), size, nothrow); +} + + +mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { + void* p = mi_heap_malloc(heap,size); + if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false); return p; } -mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) { + return mi_heap_alloc_new(mi_get_default_heap(), size); +} + + +mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { + size_t total; + if mi_unlikely(mi_count_size_overflow(count, size, &total)) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_heap_alloc_new(heap,total); + } +} + +mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) { + return mi_heap_alloc_new_n(mi_get_default_heap(), size, count); +} + + +mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept { void* p = mi_malloc(size); - if (mi_unlikely(p == NULL)) return mi_try_new(size, true); + if mi_unlikely(p == NULL) return mi_try_new(size, true); return p; } -mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { +mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { void* p; do { p = mi_malloc_aligned(size, alignment); @@ -890,7 +978,7 @@ mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { return p; } -mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept { void* p; do { p = mi_malloc_aligned(size, alignment); @@ -899,18 +987,7 @@ mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_ return p; } -mi_decl_restrict void* mi_new_n(size_t count, size_t size) { - size_t total; - if (mi_unlikely(mi_count_size_overflow(count, size, &total))) { - mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc - return NULL; - } - else { - return mi_new(total); - } -} - -void* mi_new_realloc(void* p, size_t newsize) { +mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) { void* q; do { q = mi_realloc(p, newsize); @@ -918,9 +995,9 @@ void* mi_new_realloc(void* p, size_t newsize) { return q; } -void* mi_new_reallocn(void* p, size_t newcount, size_t size) { +mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) { size_t total; - if (mi_unlikely(mi_count_size_overflow(newcount, size, &total))) { + if mi_unlikely(mi_count_size_overflow(newcount, size, &total)) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc return NULL; } @@ -928,3 +1005,23 @@ void* mi_new_reallocn(void* p, size_t newcount, size_t size) { return mi_new_realloc(p, total); } } + +// ------------------------------------------------------ +// ensure explicit external inline definitions are emitted! +// ------------------------------------------------------ + +#ifdef __cplusplus +void* _mi_externs[] = { + (void*)&_mi_page_malloc, + (void*)&_mi_heap_malloc_zero, + (void*)&_mi_heap_malloc_zero_ex, + (void*)&mi_malloc, + (void*)&mi_malloc_small, + (void*)&mi_zalloc_small, + (void*)&mi_heap_malloc, + (void*)&mi_heap_zalloc, + (void*)&mi_heap_malloc_small, + (void*)&mi_heap_alloc_new, + (void*)&mi_heap_alloc_new_n +}; +#endif diff --git a/compat/mimalloc/arena.c b/compat/mimalloc/arena.c index 567c8a93ac30c8..232bc05a74c0b1 100644 --- a/compat/mimalloc/arena.c +++ b/compat/mimalloc/arena.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2021, Microsoft Research, Daan Leijen +Copyright (c) 2019-2022, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -45,16 +45,17 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); Arena allocation ----------------------------------------------------------- */ - // Block info: bit 0 contains the `in_use` bit, the upper bits the // size in count of arena blocks. typedef uintptr_t mi_block_info_t; -#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 8MiB (must be at least MI_SEGMENT_ALIGN) -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 4MiB -#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB +#define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor typedef struct mi_arena_s { + mi_arena_id_t id; // arena id; 0 for non-specific + bool exclusive; // only allow allocations if specifically for this arena _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) @@ -74,24 +75,59 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 +/* ----------------------------------------------------------- + Arena id's + 0 is used for non-arena's (like OS memory) + id = arena_index + 1 +----------------------------------------------------------- */ + +static size_t mi_arena_id_index(mi_arena_id_t id) { + return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); +} + +static mi_arena_id_t mi_arena_id_create(size_t arena_index) { + mi_assert_internal(arena_index < MI_MAX_ARENAS); + mi_assert_internal(MI_MAX_ARENAS <= 126); + int id = (int)arena_index + 1; + mi_assert_internal(id >= 1 && id <= 127); + return id; +} + +mi_arena_id_t _mi_arena_id_none(void) { + return 0; +} + +static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { + return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || + (arena_id == req_arena_id)); +} + + /* ----------------------------------------------------------- Arena allocations get a memory id where the lower 8 bits are - the arena index +1, and the upper bits the block index. + the arena id, and the upper bits the block index. ----------------------------------------------------------- */ // Use `0` as a special id for direct OS allocated memory. #define MI_MEMID_OS 0 -static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { - mi_assert_internal(arena_index < 0xFE); +static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_index_t bitmap_index) { mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? - return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); + mi_assert_internal(id >= 0 && id <= 0x7F); + return ((bitmap_index << 8) | ((uint8_t)id & 0x7F) | (exclusive ? 0x80 : 0)); } -static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - mi_assert_internal(memid != MI_MEMID_OS); - *arena_index = (memid & 0xFF) - 1; - *bitmap_index = (memid >> 8); +static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + *bitmap_index = (arena_memid >> 8); + mi_arena_id_t id = (int)(arena_memid & 0x7F); + *arena_index = mi_arena_id_index(id); + return ((arena_memid & 0x80) != 0); +} + +bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) { + mi_arena_id_t id = (int)(arena_memid & 0x7F); + bool exclusive = ((arena_memid & 0x80) != 0); + return mi_arena_id_is_suitable(id, exclusive, request_arena_id); } static size_t mi_block_count_of_size(size_t size) { @@ -117,14 +153,19 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* ----------------------------------------------------------- */ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) + bool* commit, bool* large, bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { + MI_UNUSED(arena_index); + mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + mi_bitmap_index_t bitmap_index; if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! set the dirty bits (todo: no need for an atomic op here?) void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); - *memid = mi_arena_id_create(arena_index, bitmap_index); + *memid = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index); *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); *large = arena->is_large; *is_pinned = (arena->is_large || !arena->allow_decommit); @@ -149,50 +190,63 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren return p; } -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +// allocate from an arena with fallback to the OS +static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, + bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t bcount = mi_block_count_of_size(size); - if (mi_likely(max_arena == 0)) return NULL; - mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); - - // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + if mi_likely(max_arena == 0) return NULL; + mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); + + size_t arena_index = mi_arena_id_index(req_arena_id); + if (arena_index < MI_MAX_ARENAS) { + // try a specific arena if requested + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + if ((arena != NULL) && + (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); + void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) { - return p; - } + if (p != NULL) return p; } } + else { + // try numa affine allocation + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; // end reached + if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } - // try from another numa node instead.. - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) { - return p; + // try from another numa node instead.. + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; // end reached + if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; } } } return NULL; } - -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); @@ -201,50 +255,61 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* *is_pinned = false; bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` + if (large == NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) { - void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, memid, tld); + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { + void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; } // finally, fall back to the OS - if (mi_option_is_enabled(mi_option_limit_os_alloc)) { + if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } *is_zero = true; *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); - if (p != NULL) *is_pinned = *large; + void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats); + if (p != NULL) { *is_pinned = *large; } return p; } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); +} + +void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { + if (size != NULL) *size = 0; + size_t arena_index = mi_arena_id_index(arena_id); + if (arena_index >= MI_MAX_ARENAS) return NULL; + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + if (arena == NULL) return NULL; + if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE; + return arena->start; } /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_os_tld_t* tld) { - mi_assert_internal(size > 0 && tld->stats != NULL); +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); if (p==NULL) return; if (size==0) return; if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - _mi_os_free_ex(p, size, all_committed, tld->stats); + _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats); } else { // allocated in an arena + mi_assert_internal(align_offset == 0); size_t arena_idx; size_t bitmap_idx; - mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); + mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); @@ -265,7 +330,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_o } else { mi_assert_internal(arena->blocks_committed != NULL); - _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, tld->stats); // ok if this fails + _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); } // and make it available to others again @@ -281,10 +346,11 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_o Add an arena. ----------------------------------------------------------- */ -static bool mi_arena_add(mi_arena_t* arena) { +static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); + if (arena_id != NULL) *arena_id = -1; size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { @@ -292,11 +358,14 @@ static bool mi_arena_add(mi_arena_t* arena) { return false; } mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + arena->id = mi_arena_id_create(i); + if (arena_id != NULL) *arena_id = arena->id; return true; } -bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; if (is_large) { @@ -311,6 +380,8 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; + arena->id = _mi_arena_id_none(); + arena->exclusive = exclusive; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; @@ -335,18 +406,19 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } - mi_arena_add(arena); - return true; + return mi_arena_add(arena, arena_id); + } // Reserve a range of regular OS memory -int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block bool large = allow_large; void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory(start, size, (large || commit), large, true, -1)) { + if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) { _mi_os_free_ex(start, size, commit, &_mi_stats_main); _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); return ENOMEM; @@ -355,6 +427,19 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe return 0; } +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { + return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL); +} + +int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { + return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); +} + + +/* ----------------------------------------------------------- + Debugging +----------------------------------------------------------- */ + static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) { size_t inuse_count = 0; for (size_t i = 0; i < field_count; i++) { @@ -383,11 +468,13 @@ void mi_debug_show_arenas(void) mi_attr_noexcept { } } + /* ----------------------------------------------------------- Reserve a huge page arena. ----------------------------------------------------------- */ // reserve at a specific numa node -int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { +int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = -1; if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); @@ -400,13 +487,16 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) { + if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } return 0; } +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { + return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL); +} // reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { diff --git a/compat/mimalloc/bitmap.c b/compat/mimalloc/bitmap.c index 8634b32ab13fa9..56a8c3057b46b8 100644 --- a/compat/mimalloc/bitmap.c +++ b/compat/mimalloc/bitmap.c @@ -108,6 +108,25 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel return false; } +// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled +bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, + const size_t start_field_idx, const size_t count, + mi_bitmap_pred_fun_t pred_fun, void* pred_arg, + mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) { + return true; + } + // predicate returned false, unclaim and look further + _mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx); + } + } + return false; +} + /* // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. @@ -283,7 +302,7 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) { MI_UNUSED_RELEASE(bitmap_fields); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) { + if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) { *pre_mask = mi_bitmap_mask_(count, bitidx); *mid_mask = 0; *post_mask = 0; diff --git a/compat/mimalloc/bitmap.h b/compat/mimalloc/bitmap.h index e3375033a9326e..e92f07503f70e1 100644 --- a/compat/mimalloc/bitmap.h +++ b/compat/mimalloc/bitmap.h @@ -72,6 +72,10 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); +// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled +typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg); +bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx); + // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); diff --git a/compat/mimalloc/heap.c b/compat/mimalloc/heap.c index 42c6cfd63699db..01d100a0b1e642 100644 --- a/compat/mimalloc/heap.c +++ b/compat/mimalloc/heap.c @@ -139,9 +139,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } - // free thread delayed blocks. + // free all current thread delayed blocks. // (if abandoning, after this there are no more thread-delayed references into the pages.) - _mi_heap_delayed_free(heap); + _mi_heap_delayed_free_all(heap); // collect retired pages _mi_heap_collect_retired(heap, force); @@ -200,13 +200,14 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -mi_heap_t* mi_heap_new(void) { +mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? if (heap==NULL) return NULL; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); + heap->arena_id = arena_id; _mi_random_split(&bheap->random, &heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); @@ -218,6 +219,14 @@ mi_heap_t* mi_heap_new(void) { return heap; } +mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { + return mi_heap_new_in_arena(_mi_arena_id_none()); +} + +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) { + return _mi_arena_memid_is_suitable(memid, heap->arena_id); +} + uintptr_t _mi_heap_random_next(mi_heap_t* heap) { return _mi_random_next(&heap->random); } @@ -338,7 +347,20 @@ void mi_heap_destroy(mi_heap_t* heap) { } } - +void _mi_heap_destroy_all(void) { + mi_heap_t* bheap = mi_heap_get_backing(); + mi_heap_t* curr = bheap->tld->heaps; + while (curr != NULL) { + mi_heap_t* next = curr->next; + if (curr->no_reclaim) { + mi_heap_destroy(curr); + } + else { + _mi_heap_destroy_pages(curr); + } + curr = next; + } +} /* ----------------------------------------------------------- Safe Heap delete @@ -350,7 +372,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { if (from==NULL || from->page_count == 0) return; // reduce the size of the delayed frees - _mi_heap_delayed_free(from); + _mi_heap_delayed_free_partial(from); // transfer all pages by appending the queues; this will set a new heap field // so threads may do delayed frees in either heap for a while. @@ -369,7 +391,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // note: be careful here as the `heap` field in all those pages no longer point to `from`, // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a // the regular `_mi_free_delayed_block` which is safe. - _mi_heap_delayed_free(from); + _mi_heap_delayed_free_all(from); #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353 mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL); #endif @@ -421,7 +443,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) { mi_segment_t* segment = _mi_ptr_segment(p); bool valid = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(valid); - if (mi_unlikely(!valid)) return NULL; + if mi_unlikely(!valid) return NULL; return mi_page_heap(_mi_segment_page_of(segment,p)); } @@ -543,7 +565,7 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa xarea.area.reserved = page->reserved * bsize; xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used * bsize; + xarea.area.used = page->used; // number of blocks in use (#553) xarea.area.block_size = ubsize; xarea.area.full_block_size = bsize; return fun(heap, &xarea, arg); diff --git a/compat/mimalloc/init.c b/compat/mimalloc/init.c index 6b2a99e47a6180..76c13a7daf497e 100644 --- a/compat/mimalloc/init.c +++ b/compat/mimalloc/init.c @@ -19,12 +19,12 @@ const mi_page_t _mi_page_empty = { false, // is_zero 0, // retire_expire NULL, // free - #if MI_ENCODE_FREELIST - { 0, 0 }, - #endif 0, // used 0, // xblock_size NULL, // local_free + #if MI_ENCODE_FREELIST + { 0, 0 }, + #endif MI_ATOMIC_VAR_INIT(0), // xthread_free MI_ATOMIC_VAR_INIT(0), // xheap NULL, NULL @@ -109,8 +109,9 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { MI_ATOMIC_VAR_INIT(NULL), 0, // tid 0, // cookie + 0, // arena id { 0, 0 }, // keys - { {0}, {0}, 0 }, + { {0}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next @@ -149,8 +150,9 @@ mi_heap_t _mi_heap_main = { MI_ATOMIC_VAR_INIT(NULL), 0, // thread id 0, // initial cookie + 0, // arena id { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0x846ca68b}, {0}, 0 }, // random + { {0x846ca68b}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next heap @@ -165,8 +167,13 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; static void mi_heap_main_init(void) { if (_mi_heap_main.cookie == 0) { _mi_heap_main.thread_id = _mi_thread_id(); - _mi_heap_main.cookie = _mi_os_random_weak((uintptr_t)&mi_heap_main_init); - _mi_random_init(&_mi_heap_main.random); + _mi_heap_main.cookie = 1; + #if defined(_WIN32) && !defined(MI_SHARED_LIB) + _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking + #else + _mi_random_init(&_mi_heap_main.random); + #endif + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); } @@ -372,7 +379,11 @@ static void _mi_thread_done(mi_heap_t* default_heap); #endif static DWORD mi_fls_key = (DWORD)(-1); static void NTAPI mi_fls_done(PVOID value) { - if (value!=NULL) _mi_thread_done((mi_heap_t*)value); + mi_heap_t* heap = (mi_heap_t*)value; + if (heap != NULL) { + _mi_thread_done(heap); + FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 + } } #elif defined(MI_USE_PTHREADS) // use pthread local storage keys to detect thread ending @@ -475,7 +486,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // -------------------------------------------------------- // Run functions on process init/done, and thread init/done // -------------------------------------------------------- -static void mi_process_done(void); +static void mi_cdecl mi_process_done(void); static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc @@ -490,7 +501,7 @@ mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { } // Communicate with the redirection module on Windows -#if defined(_WIN32) && defined(MI_SHARED_LIB) +#if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) #ifdef __cplusplus extern "C" { #endif @@ -506,8 +517,8 @@ mi_decl_export void _mi_redirect_entry(DWORD reason) { mi_thread_done(); } } -__declspec(dllimport) bool mi_allocator_init(const char** message); -__declspec(dllimport) void mi_allocator_done(void); +__declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message); +__declspec(dllimport) void mi_cdecl mi_allocator_done(void); #ifdef __cplusplus } #endif @@ -529,12 +540,13 @@ static void mi_process_load(void) { MI_UNUSED(dummy); #endif os_preloading = false; + mi_assert_internal(_mi_is_main_thread()); #if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521) atexit(&mi_process_done); #endif _mi_options_init(); + mi_process_setup_auto_thread_done(); mi_process_init(); - //mi_stats_reset();- if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); // show message from the redirector (if present) @@ -543,6 +555,9 @@ static void mi_process_load(void) { if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { _mi_fputs(NULL,NULL,NULL,msg); } + + // reseed random + _mi_random_reinit_if_weak(&_mi_heap_main.random); } #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) @@ -569,7 +584,6 @@ void mi_process_init(void) mi_attr_noexcept { _mi_process_is_initialized = true; mi_process_setup_auto_thread_done(); - mi_detect_cpu_features(); _mi_os_init(); mi_heap_main_init(); @@ -577,6 +591,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif _mi_verbose_message("secure level: %d\n", MI_SECURE); + _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); mi_thread_init(); #if defined(_WIN32) && !defined(MI_SHARED_LIB) @@ -606,7 +621,7 @@ void mi_process_init(void) mi_attr_noexcept { } // Called when the process is done (through `at_exit`) -static void mi_process_done(void) { +static void mi_cdecl mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; // ensure we are called once @@ -627,6 +642,14 @@ static void mi_process_done(void) { #endif #endif + // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free + // since after process_done there might still be other code running that calls `free` (like at_exit routines, + // or C-runtime termination code. + if (mi_option_is_enabled(mi_option_destroy_on_exit)) { + _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) + _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os); // release all cached segments + } + if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { mi_stats_print(NULL); } diff --git a/compat/mimalloc/mimalloc-internal.h b/compat/mimalloc/mimalloc-internal.h index de5c53b1e52f93..60845ae416d215 100644 --- a/compat/mimalloc/mimalloc-internal.h +++ b/compat/mimalloc/mimalloc-internal.h @@ -9,6 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MIMALLOC_INTERNAL_H #include "mimalloc-types.h" +#include "mimalloc-track.h" #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) @@ -59,6 +60,8 @@ void _mi_error_message(int err, const char* fmt, ...); // random.c void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_init_weak(mi_random_ctx_t* ctx); +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_heap_random_next(mi_heap_t* heap); @@ -86,26 +89,38 @@ bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); // bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); + +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); // arena.c -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); +mi_arena_id_t _mi_arena_id_none(void); +bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id); // "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); +void _mi_segment_cache_free_all(mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); + +#if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#else +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#endif uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); @@ -115,16 +130,18 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* // "page.c" -void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... -void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_delayed_free_all(mi_heap_t* heap); +bool _mi_heap_delayed_free_partial(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); @@ -138,6 +155,8 @@ uint8_t _mi_bin(size_t size); // for stats void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); +void _mi_heap_destroy_all(void); // "stats.c" void _mi_stats_done(mi_stats_t* stats); @@ -147,12 +166,13 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); // "alloc.c" -void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); -void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size); +void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -164,8 +184,11 @@ bool _mi_page_is_valid(mi_page_t* page); // ------------------------------------------------------ #if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) __builtin_expect(!!(x),false) -#define mi_likely(x) __builtin_expect(!!(x),true) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] #else #define mi_unlikely(x) (x) #define mi_likely(x) (x) @@ -224,6 +247,12 @@ static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); } +// Is a pointer aligned? +static inline bool _mi_is_aligned(void* p, size_t alignment) { + mi_assert_internal(alignment != 0); + return (((uintptr_t)p % alignment) == 0); +} + // Align upwards static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { mi_assert_internal(alignment != 0); @@ -289,8 +318,8 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; - return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) - && size > 0 && (SIZE_MAX / size) < count); + // note: gcc/clang optimize this to directly check the overflow flag + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); } #endif @@ -300,8 +329,10 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot *total = size; return false; } - else if (mi_unlikely(mi_mul_overflow(count, size, total))) { + else if mi_unlikely(mi_mul_overflow(count, size, total)) { + #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); + #endif *total = SIZE_MAX; return true; } @@ -372,7 +403,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate static inline mi_heap_t* mi_get_default_heap(void) { #if defined(MI_TLS_SLOT) mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); - if (mi_unlikely(heap == NULL)) { + if mi_unlikely(heap == NULL) { #ifdef __GNUC__ __asm(""); // prevent conditional load of the address of _mi_heap_empty #endif @@ -429,9 +460,12 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { } // Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. static inline mi_segment_t* _mi_ptr_segment(const void* p) { - // mi_assert_internal(p != NULL); - return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); + mi_assert_internal(p != NULL); + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); } static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { @@ -459,12 +493,13 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { return start; } -// Get the page containing the pointer +// Get the page containing the pointer (performance critical as it is called in mi_free) static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + mi_assert_internal(p > (void*)segment); ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); + mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; - mi_assert_internal(idx < segment->slice_entries); + mi_assert_internal(idx <= segment->slice_entries); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); @@ -486,7 +521,7 @@ static inline mi_page_t* _mi_ptr_page(void* p) { static inline size_t mi_page_block_size(const mi_page_t* page) { const size_t bsize = page->xblock_size; mi_assert_internal(bsize > 0); - if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { + if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) { return bsize; } else { @@ -496,6 +531,10 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } +static inline bool mi_page_is_huge(const mi_page_t* page) { + return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); +} + // Get the usable block size of a page without fixed padding. // This may still include internal padding due to alignment and rounding up size classes. static inline size_t mi_page_usable_block_size(const mi_page_t* page) { @@ -649,30 +688,36 @@ static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); - return (mi_unlikely(p==null) ? NULL : p); + return (p==null ? NULL : p); } static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { - uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p); + uintptr_t x = (uintptr_t)(p==NULL ? null : p); return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; } static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { + mi_track_mem_defined(block,sizeof(mi_block_t)); + mi_block_t* next; #ifdef MI_ENCODE_FREELIST - return (mi_block_t*)mi_ptr_decode(null, block->next, keys); + next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); #else MI_UNUSED(keys); MI_UNUSED(null); - return (mi_block_t*)block->next; + next = (mi_block_t*)block->next; #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); + return next; } static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { + mi_track_mem_undefined(block,sizeof(mi_block_t)); #ifdef MI_ENCODE_FREELIST block->next = mi_ptr_encode(null, next, keys); #else MI_UNUSED(keys); MI_UNUSED(null); block->next = (mi_encoded_t)next; #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { @@ -680,7 +725,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* mi_block_t* next = mi_block_nextx(page,block,page->keys); // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? - if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { + if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) { _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } @@ -779,12 +824,12 @@ size_t _mi_os_numa_node_count_get(void); extern _Atomic(size_t) _mi_numa_node_count; static inline int _mi_os_numa_node(mi_os_tld_t* tld) { - if (mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1)) return 0; + if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } else return _mi_os_numa_node_get(tld); } static inline size_t _mi_os_numa_node_count(void) { const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); - if (mi_likely(count>0)) return count; + if mi_likely(count > 0) { return count; } else return _mi_os_numa_node_count_get(); } @@ -1003,7 +1048,7 @@ static inline size_t mi_bsr(uintptr_t x) { // (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. // --------------------------------------------------------------------------------- -#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) +#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include #include extern bool _mi_cpu_has_fsrm; @@ -1012,7 +1057,15 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); } else { - memcpy(dst, src, n); // todo: use noinline? + memcpy(dst, src, n); + } +} +static inline void _mi_memzero(void* dst, size_t n) { + if (_mi_cpu_has_fsrm) { + __stosb((unsigned char*)dst, 0, n); + } + else { + memset(dst, 0, n); } } #else @@ -1020,6 +1073,9 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) { static inline void _mi_memcpy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); } +static inline void _mi_memzero(void* dst, size_t n) { + memset(dst, 0, n); +} #endif @@ -1037,12 +1093,23 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); _mi_memcpy(adst, asrc, n); } + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); + _mi_memzero(adst, n); +} #else // Default fallback on `_mi_memcpy` static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); _mi_memcpy(dst, src, n); } + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + _mi_memzero(dst, n); +} #endif diff --git a/compat/mimalloc/mimalloc-track.h b/compat/mimalloc/mimalloc-track.h new file mode 100644 index 00000000000000..f60d7acd0b8fcd --- /dev/null +++ b/compat/mimalloc/mimalloc-track.h @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TRACK_H +#define MIMALLOC_TRACK_H + +// ------------------------------------------------------ +// Track memory ranges with macros for tools like Valgrind +// address sanitizer, or other memory checkers. +// ------------------------------------------------------ + +#if MI_VALGRIND + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_TOOL "valgrind" + +#include +#include + +#define mi_track_malloc(p,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) +#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) +#define mi_track_free(p) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) +#define mi_track_free_size(p,_size) mi_track_free(p) +#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) +#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) +#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) + +#elif MI_ASAN + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_TOOL "asan" + +#include + +#define mi_track_malloc(p,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_resize(p,oldsize,newsize) ASAN_POISON_MEMORY_REGION(p,oldsize); ASAN_UNPOISON_MEMORY_REGION(p,newsize) +#define mi_track_free(p) ASAN_POISON_MEMORY_REGION(p,mi_usable_size(p)) +#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) +#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) + +#else + +#define MI_TRACK_ENABLED 0 +#define MI_TRACK_TOOL "none" + +#define mi_track_malloc(p,size,zero) +#define mi_track_resize(p,oldsize,newsize) +#define mi_track_free(p) +#define mi_track_free_size(p,_size) +#define mi_track_mem_defined(p,size) +#define mi_track_mem_undefined(p,size) +#define mi_track_mem_noaccess(p,size) + +#endif + +#endif diff --git a/compat/mimalloc/mimalloc-types.h b/compat/mimalloc/mimalloc-types.h index a07858e22bccc0..7467945bbc4f5f 100644 --- a/compat/mimalloc/mimalloc-types.h +++ b/compat/mimalloc/mimalloc-types.h @@ -29,6 +29,9 @@ terms of the MIT license. A copy of the license can be found in the file // Define NDEBUG in the release version to disable assertions. // #define NDEBUG +// Define MI_VALGRIND to enable valgrind support +// #define MI_VALGRIND 1 + // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 @@ -56,18 +59,25 @@ terms of the MIT license. A copy of the license can be found in the file // Reserve extra padding at the end of each block to be more resilient against heap block overflows. // The padding can detect byte-precise buffer overflow on free. -#if !defined(MI_PADDING) && (MI_DEBUG>=1) +#if !defined(MI_PADDING) && (MI_DEBUG>=1 || MI_VALGRIND) #define MI_PADDING 1 #endif // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows, modify after free, and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1 || MI_PADDING > 0) +#if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif +// We used to abandon huge pages but to eagerly deallocate if freed from another thread, +// but that makes it not possible to visit them during a heap walk or include them in a +// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from +// another thread so most memory is available until it gets properly freed by the owning thread. +// #define MI_HUGE_PAGE_ABANDON 1 + + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ @@ -132,7 +142,7 @@ typedef int32_t mi_ssize_t; #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) #if MI_INTPTR_SIZE > 4 -#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB +#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB #else #define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit #endif @@ -144,7 +154,7 @@ typedef int32_t mi_ssize_t; // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<= 655360) #error "mimalloc internal: define more bins" #endif -#if (MI_ALIGNMENT_MAX > MI_SEGMENT_SIZE/2) -#error "mimalloc internal: the max aligned boundary is too large for the segment size" -#endif -#if (MI_ALIGNED_MAX % MI_SEGMENT_SLICE_SIZE != 0) -#error "mimalloc internal: the max aligned boundary must be an integral multiple of the segment slice size" -#endif // Maximum slice offset (15) #define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) @@ -179,7 +183,8 @@ typedef int32_t mi_ssize_t; // blocks up to this size are always allocated aligned #define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) - +// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) // ------------------------------------------------------ @@ -269,30 +274,31 @@ typedef struct mi_page_s { // "owned" by the segment uint32_t slice_count; // slices in this page (0 if not a page) uint32_t slice_offset; // distance from the actual page data slice (0 if a page) - uint8_t is_reset : 1; // `true` if the page memory was reset - uint8_t is_committed : 1; // `true` if the page virtual memory is committed - uint8_t is_zero_init : 1; // `true` if the page was zero initialized + uint8_t is_reset : 1; // `true` if the page memory was reset + uint8_t is_committed : 1; // `true` if the page virtual memory is committed + uint8_t is_zero_init : 1; // `true` if the page was zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized - uint8_t retire_expire : 7; // expiration count for retired blocks + uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire : 7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + #ifdef MI_ENCODE_FREELIST uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif - uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) - uint32_t xblock_size; // size available in each block (always `>0`) - mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; - struct mi_page_s* next; // next page owned by this thread with the same `block_size` - struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + struct mi_page_s* next; // next page owned by this thread with the same `block_size` + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // 64-bit 9 words, 32-bit 12 words, (+2 for secure) #if MI_INTPTR_SIZE==8 @@ -326,7 +332,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB) +#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS @@ -352,6 +358,8 @@ typedef struct mi_segment_s { bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_large; // in large/huge os pages? bool mem_is_committed; // `true` if the whole segment is eagerly committed + size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) + size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) bool allow_decommit; mi_msecs_t decommit_expire; @@ -373,9 +381,10 @@ typedef struct mi_segment_s { // layout like this to optimize access in `mi_free` mi_segment_kind_t kind; - _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` - mi_slice_t slices[MI_SLICES_PER_SEGMENT]; + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment } mi_segment_t; @@ -409,6 +418,7 @@ typedef struct mi_random_cxt_s { uint32_t input[16]; uint32_t output[16]; int output_available; + bool weak; } mi_random_ctx_t; @@ -435,6 +445,7 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") _Atomic(mi_block_t*) thread_delayed_free; mi_threadid_t thread_id; // thread this heap belongs too + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation diff --git a/compat/mimalloc/mimalloc.h b/compat/mimalloc/mimalloc.h index 41ccc62d73d182..d4e96cba4f01fa 100644 --- a/compat/mimalloc/mimalloc.h +++ b/compat/mimalloc/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 206 // major + 2 digits minor +#define MI_MALLOC_VERSION 209 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -28,6 +28,8 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_nodiscard [[nodiscard]] #elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl #define mi_decl_nodiscard __attribute__((warn_unused_result)) +#elif defined(_HAS_NODISCARD) + #define mi_decl_nodiscard _NODISCARD #elif (_MSC_VER >= 1700) #define mi_decl_nodiscard _Check_return_ #else @@ -96,6 +98,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "git-compat-util.h" #include // bool +#include // INTPTR_MAX #ifdef __cplusplus extern "C" { @@ -167,7 +170,6 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment is 1MiB mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); @@ -276,6 +278,18 @@ mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_commit mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept; +// Experimental: heaps associated with specific memory arena's +typedef int mi_arena_id_t; +mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); +mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; + +#if MI_MALLOC_VERSION >= 200 +// Create a heap that only allocates in the specified arena +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); +#endif + // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; @@ -332,6 +346,7 @@ typedef enum mi_option_e { mi_option_allow_decommit, mi_option_segment_decommit_delay, mi_option_decommit_extend_delay, + mi_option_destroy_on_exit, _mi_option_last } mi_option_t; @@ -390,6 +405,9 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, s mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); + #ifdef __cplusplus } #endif @@ -407,7 +425,7 @@ mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, #include // std::forward #endif -template struct mi_stl_allocator { +template struct _mi_stl_allocator_common { typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; @@ -415,6 +433,27 @@ template struct mi_stl_allocator { typedef value_type const& const_reference; typedef value_type* pointer; typedef value_type const* const_pointer; + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } + #else + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } + #endif + + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +}; + +template struct mi_stl_allocator : public _mi_stl_allocator_common { + using typename _mi_stl_allocator_common::size_type; + using typename _mi_stl_allocator_common::value_type; + using typename _mi_stl_allocator_common::pointer; template struct rebind { typedef mi_stl_allocator other; }; mi_stl_allocator() mi_attr_noexcept = default; @@ -431,24 +470,89 @@ template struct mi_stl_allocator { #endif #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 - using propagate_on_container_copy_assignment = std::true_type; - using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_swap = std::true_type; - using is_always_equal = std::true_type; - template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } - template void destroy(U* p) mi_attr_noexcept { p->~U(); } - #else - void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } - void destroy(pointer p) { p->~value_type(); } + using is_always_equal = std::true_type; #endif - - size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } }; template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } + + +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include // std::shared_ptr + +// Common base class for STL allocators in a specific heap +template struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common { + using typename _mi_stl_allocator_common::size_type; + using typename _mi_stl_allocator_common::value_type; + using typename _mi_stl_allocator_common::pointer; + + _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */ + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using is_always_equal = std::false_type; + #endif + + void collect(bool force) { mi_heap_collect(this->heap.get(), force); } + template bool is_equal(const _mi_heap_stl_allocator_common& x) const { return (this->heap == x.heap); } + +protected: + std::shared_ptr heap; + template friend struct _mi_heap_stl_allocator_common; + + _mi_heap_stl_allocator_common() { + mi_heap_t* hp = mi_heap_new(); + this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ + } + _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + template _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + +private: + static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } + static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } } +}; + +// STL allocator allocation in a specific heap +template struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common { + using typename _mi_heap_stl_allocator_common::size_type; + mi_heap_stl_allocator() : _mi_heap_stl_allocator_common() { } // creates fresh heap that is deleted when the destructor is called + mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common(hp) { } // no delete nor destroy on the passed in heap + template mi_heap_stl_allocator(const mi_heap_stl_allocator& x) mi_attr_noexcept : _mi_heap_stl_allocator_common(x) { } + + mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + template struct rebind { typedef mi_heap_stl_allocator other; }; +}; + +template bool operator==(const mi_heap_stl_allocator& x, const mi_heap_stl_allocator& y) mi_attr_noexcept { return (x.is_equal(y)); } +template bool operator!=(const mi_heap_stl_allocator& x, const mi_heap_stl_allocator& y) mi_attr_noexcept { return (!x.is_equal(y)); } + + +// STL allocator allocation in a specific heap, where `free` does nothing and +// the heap is destroyed in one go on destruction -- use with care! +template struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common { + using typename _mi_heap_stl_allocator_common::size_type; + mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common() { } // creates fresh heap that is destroyed when the destructor is called + mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common(hp) { } // no delete nor destroy on the passed in heap + template mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator& x) mi_attr_noexcept : _mi_heap_stl_allocator_common(x) { } + + mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ } + template struct rebind { typedef mi_heap_destroy_stl_allocator other; }; +}; + +template bool operator==(const mi_heap_destroy_stl_allocator& x, const mi_heap_destroy_stl_allocator& y) mi_attr_noexcept { return (x.is_equal(y)); } +template bool operator!=(const mi_heap_destroy_stl_allocator& x, const mi_heap_destroy_stl_allocator& y) mi_attr_noexcept { return (!x.is_equal(y)); } + +#endif // C++11 + #endif // __cplusplus #endif diff --git a/compat/mimalloc/options.c b/compat/mimalloc/options.c index 3c68bff00d4d16..ebb227da14823a 100644 --- a/compat/mimalloc/options.c +++ b/compat/mimalloc/options.c @@ -94,7 +94,8 @@ static mi_option_desc_t options[_mi_option_last] = { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments - { 2, UNINIT, MI_OPTION(decommit_extend_delay) } + { 1, UNINIT, MI_OPTION(decommit_extend_delay) }, + { 0, UNINIT, MI_OPTION(destroy_on_exit)} // release all OS memory on process exit; careful with dangling pointer or after-exit frees! }; static void mi_option_init(mi_option_desc_t* desc); @@ -106,7 +107,8 @@ void _mi_options_init(void) { for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; long l = mi_option_get(option); MI_UNUSED(l); // initialize - if (option != mi_option_verbose) { + // if (option != mi_option_verbose) + { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } @@ -120,7 +122,7 @@ mi_decl_nodiscard long mi_option_get(mi_option_t option) { if (option < 0 || option >= _mi_option_last) return 0; mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option - if (mi_unlikely(desc->init == UNINIT)) { + if mi_unlikely(desc->init == UNINIT) { mi_option_init(desc); } return desc->value; @@ -170,7 +172,7 @@ void mi_option_disable(mi_option_t option) { } -static void mi_out_stderr(const char* msg, void* arg) { +static void mi_cdecl mi_out_stderr(const char* msg, void* arg) { MI_UNUSED(arg); if (msg == NULL) return; #ifdef _WIN32 @@ -179,20 +181,26 @@ static void mi_out_stderr(const char* msg, void* arg) { if (!_mi_preloading()) { // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; - static int write_to_console; + static bool hconIsConsole; if (hcon == INVALID_HANDLE_VALUE) { CONSOLE_SCREEN_BUFFER_INFO sbi; hcon = GetStdHandle(STD_ERROR_HANDLE); - write_to_console = GetConsoleScreenBufferInfo(hcon, &sbi) ? 1 : 0; - } - if (!write_to_console) { - fputs(msg, stderr); - return; + hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); } const size_t len = strlen(msg); - if (hcon != INVALID_HANDLE_VALUE && len > 0 && len < UINT32_MAX) { + if (len > 0 && len < UINT32_MAX) { DWORD written = 0; - WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + if (hconIsConsole) { + WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + } + else if (hcon != INVALID_HANDLE_VALUE) { + // use direct write if stderr was redirected + WriteFile(hcon, msg, (DWORD)len, &written, NULL); + } + else { + // finally fall back to fputs after all + fputs(msg, stderr); + } } } #else @@ -210,7 +218,7 @@ static void mi_out_stderr(const char* msg, void* arg) { static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(size_t) out_len; -static void mi_out_buf(const char* msg, void* arg) { +static void mi_cdecl mi_out_buf(const char* msg, void* arg) { MI_UNUSED(arg); if (msg==NULL) return; if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; @@ -242,7 +250,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { // Once this module is loaded, switch to this routine // which outputs to stderr and the delayed output buffer. -static void mi_out_buf_stderr(const char* msg, void* arg) { +static void mi_cdecl mi_out_buf_stderr(const char* msg, void* arg) { mi_out_stderr(msg,arg); mi_out_buf(msg,arg); } @@ -487,13 +495,6 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { return false; } #else -static inline int mi_strnicmp(const char* s, const char* t, size_t n) { - if (n==0) return 0; - for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { - if (toupper(*s) != toupper(*t)) break; - } - return (n==0 ? 0 : *s - *t); -} #if defined _WIN32 // On Windows use GetEnvironmentVariable instead of getenv to work // reliably even when this is invoked before the C runtime is initialized. @@ -519,6 +520,13 @@ static char** mi_get_environ(void) { return environ; } #endif +static int mi_strnicmp(const char* s, const char* t, size_t n) { + if (n == 0) return 0; + for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { + if (toupper(*s) != toupper(*t)) break; + } + return (n == 0 ? 0 : *s - *t); +} static bool mi_getenv(const char* name, char* result, size_t result_size) { if (name==NULL) return false; const size_t len = strlen(name); diff --git a/compat/mimalloc/os.c b/compat/mimalloc/os.c index e3e5f84eeb6871..3503f8b28fdd22 100644 --- a/compat/mimalloc/os.c +++ b/compat/mimalloc/os.c @@ -122,7 +122,7 @@ size_t _mi_os_good_alloc_size(size_t size) { else if (size < 8*MI_MiB) align_size = 256*MI_KiB; else if (size < 32*MI_MiB) align_size = 1*MI_MiB; else align_size = 4*MI_MiB; - if (mi_unlikely(size >= (SIZE_MAX - align_size))) return size; // possible overflow? + if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -365,9 +365,9 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside // the memory region returned by VirtualAlloc; in that case we need to free using // the start of the region. - MEMORY_BASIC_INFORMATION info = { 0, 0 }; + MEMORY_BASIC_INFORMATION info = { 0 }; VirtualQuery(addr, &info, sizeof(info)); - if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < MI_SEGMENT_SIZE) { + if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { errcode = 0; err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); if (err) { errcode = GetLastError(); } @@ -840,7 +840,45 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); } +/* ----------------------------------------------------------- + OS aligned allocation with an offset. This is used + for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc + page where the object can be aligned at an offset from the start of the segment. + As we may need to overallocate, we need to free such pointers using `mi_free_aligned` + to use the actual start of the memory region. +----------------------------------------------------------- */ +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { + mi_assert(offset <= MI_SEGMENT_SIZE); + mi_assert(offset <= size); + mi_assert((alignment % _mi_os_page_size()) == 0); + if (offset > MI_SEGMENT_SIZE) return NULL; + if (offset == 0) { + // regular aligned allocation + return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); + } + else { + // overallocate to align at an offset + const size_t extra = _mi_align_up(offset, alignment) - offset; + const size_t oversize = size + extra; + void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); + if (start == NULL) return NULL; + void* p = (uint8_t*)start + extra; + mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); + // decommit the overallocation at the start + if (commit && extra > _mi_os_page_size()) { + _mi_os_decommit(start, extra, tld_stats); + } + return p; + } +} + +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { + mi_assert(align_offset <= MI_SEGMENT_SIZE); + const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; + void* start = (uint8_t*)p - extra; + _mi_os_free_ex(start, size + extra, was_committed, tld_stats); +} /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. @@ -989,7 +1027,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) + #if (MI_DEBUG>1) && !MI_TRACK_ENABLED if (MI_SECURE==0) { memset(start, 0, csize); // pretend it is eagerly reset } @@ -1044,13 +1082,8 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) - } - else { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); - } + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); } */ @@ -1317,14 +1350,14 @@ static size_t mi_os_numa_nodex(void) { (*pGetCurrentProcessorNumberEx)(&pnum); USHORT nnode = 0; BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); - if (ok) numa_node = nnode; + if (ok) { numa_node = nnode; } } else if (pGetNumaProcessorNode != NULL) { // Vista or earlier, use older API that is limited to 64 processors. Issue #277 DWORD pnum = GetCurrentProcessorNumber(); UCHAR nnode = 0; BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); - if (ok) numa_node = nnode; + if (ok) { numa_node = nnode; } } return numa_node; } diff --git a/compat/mimalloc/page-queue.c b/compat/mimalloc/page-queue.c index e1a8a6a6592b86..5619a81f9917fe 100644 --- a/compat/mimalloc/page-queue.c +++ b/compat/mimalloc/page-queue.c @@ -229,8 +229,9 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); - + #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + #endif mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); diff --git a/compat/mimalloc/page.c b/compat/mimalloc/page.c index 121683015de6b2..1760135545d182 100644 --- a/compat/mimalloc/page.c +++ b/compat/mimalloc/page.c @@ -112,7 +112,10 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id); - if (segment->kind != MI_SEGMENT_HUGE) { + #if MI_HUGE_PAGE_ABANDON + if (segment->kind != MI_SEGMENT_HUGE) + #endif + { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page)); @@ -124,14 +127,23 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { + while (!_mi_page_try_use_delayed_free(page, delay, override_never)) { + mi_atomic_yield(); + } +} + +bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { mi_thread_free_t tfreex; mi_delayed_t old_delay; mi_thread_free_t tfree; + size_t yield_count = 0; do { tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); - if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { + if mi_unlikely(old_delay == MI_DELAYED_FREEING) { + if (yield_count >= 4) return false; // give up after 4 tries + yield_count++; mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } @@ -143,6 +155,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid } } while ((old_delay == MI_DELAYED_FREEING) || !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + + return true; // success } /* ----------------------------------------------------------- @@ -199,7 +213,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // and the local free list if (page->local_free != NULL) { - if (mi_likely(page->free == NULL)) { + if mi_likely(page->free == NULL) { // usual case page->free = page->local_free; page->local_free = NULL; @@ -234,7 +248,9 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); + #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + #endif mi_assert_internal(!page->is_reset); // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); @@ -243,17 +259,26 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { } // allocate a fresh page from a segment -static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { - mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); - mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); +static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) { + #if !MI_HUGE_PAGE_ABANDON + mi_assert_internal(pq != NULL); + mi_assert_internal(mi_heap_contains_queue(heap, pq)); + mi_assert_internal(page_alignment > 0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || block_size == pq->block_size); + #endif + mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os); if (page == NULL) { // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) return NULL; } - mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE); - mi_page_init(heap, page, block_size, heap->tld); + mi_assert_internal(page_alignment >0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + mi_assert_internal(pq!=NULL || page->xblock_size != 0); + mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); + // a fresh page was found, initialize it + const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc + mi_assert_internal(full_block_size >= block_size); + mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); - if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + if (pq != NULL) { mi_page_queue_push(heap, pq, page); } mi_assert_expensive(_mi_page_is_valid(page)); return page; } @@ -261,7 +286,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size // Get a fresh page to use static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap, pq)); - mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); @@ -272,10 +297,18 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { Do any delayed frees (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ -void _mi_heap_delayed_free(mi_heap_t* heap) { +void _mi_heap_delayed_free_all(mi_heap_t* heap) { + while (!_mi_heap_delayed_free_partial(heap)) { + mi_atomic_yield(); + } +} + +// returns true if all delayed frees were processed +bool _mi_heap_delayed_free_partial(mi_heap_t* heap) { // take over the list (note: no atomic exchange since it is often NULL) mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; + bool all_freed = true; // and free them all while(block != NULL) { @@ -283,7 +316,9 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case delay it further by reinserting. + // reset the delayed_freeing flag; in that case delay it further by reinserting the current block + // into the delayed free list + all_freed = false; mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap, block, dfree, heap->keys); @@ -291,6 +326,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { } block = next; } + return all_freed; } /* ----------------------------------------------------------- @@ -380,7 +416,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { } // Retire parameters -#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX +#define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX) #define MI_RETIRE_CYCLES (8) // Retire a page with no more used blocks @@ -403,7 +439,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { + if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) { // not too large && not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); @@ -551,7 +587,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co #if (MI_SECURE>0) #define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many #else -#define MI_MIN_EXTEND (1) +#define MI_MIN_EXTEND (4) #endif // Extend the capacity (up to reserved) by initializing a free list @@ -619,11 +655,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_page_set_heap(page, heap); page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start size_t page_size; - _mi_segment_page_start(segment, page, &page_size); + const void* page_start = _mi_segment_page_start(segment, page, &page_size); + MI_UNUSED(page_start); + mi_track_mem_noaccess(page_start,page_size); mi_assert_internal(mi_page_block_size(page) <= page_size); mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); + mi_assert_internal(page->reserved > 0); #ifdef MI_ENCODE_FREELIST page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); @@ -773,21 +812,28 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex // Because huge pages contain just one block, and the segment contains // just that page, we always treat them as abandoned and any thread // that frees the block can free the whole page and segment directly. -static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { +// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX). +static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); - mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE); - bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX); + mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); + bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX || page_alignment > 0); + #if MI_HUGE_PAGE_ABANDON mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); - mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size); + #else + mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_HUGE_BLOCK_SIZE : block_size); // not block_size as that can be low if the page_alignment > 0 + mi_assert_internal(!is_huge || mi_page_queue_is_huge(pq)); + #endif + mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment); if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); - if (pq == NULL) { - // huge pages are directly abandoned + if (is_huge) { mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); + #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue mi_page_set_heap(page, NULL); + #endif } else { mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); @@ -809,16 +855,16 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { // Allocate a page // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. -static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept { +static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept { // huge allocation? const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` - if (mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) { - if (mi_unlikely(req_size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) + if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) { + if mi_unlikely(req_size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); return NULL; } else { - return mi_large_huge_page_alloc(heap,size); + return mi_large_huge_page_alloc(heap,size,huge_alignment); } } else { @@ -830,32 +876,34 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept { // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. -void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept +// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for +// very large requested alignments in which case we use a huge segment. +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { mi_assert_internal(heap != NULL); // initialize if necessary - if (mi_unlikely(!mi_heap_is_initialized(heap))) { + if mi_unlikely(!mi_heap_is_initialized(heap)) { mi_thread_init(); // calls `_mi_heap_init` in turn heap = mi_get_default_heap(); - if (mi_unlikely(!mi_heap_is_initialized(heap))) { return NULL; } + if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); // call potential deferred free routines _mi_deferred_free(heap, false); - // free delayed frees from other threads - _mi_heap_delayed_free(heap); + // free delayed frees from other threads (but skip contended ones) + _mi_heap_delayed_free_partial(heap); // find (or allocate) a page of the right size - mi_page_t* page = mi_find_page(heap, size); - if (mi_unlikely(page == NULL)) { // first time out of memory, try to collect and retry the allocation once more + mi_page_t* page = mi_find_page(heap, size, huge_alignment); + if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more mi_heap_collect(heap, true /* force */); - page = mi_find_page(heap, size); + page = mi_find_page(heap, size, huge_alignment); } - if (mi_unlikely(page == NULL)) { // out of memory + if mi_unlikely(page == NULL) { // out of memory const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size); return NULL; @@ -864,6 +912,15 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_block_size(page) >= size); - // and try again, this time succeeding! (i.e. this should never recurse) - return _mi_page_malloc(heap, page, size); + // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) + if mi_unlikely(zero && page->xblock_size == 0) { + // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. + void* p = _mi_page_malloc(heap, page, size, false); + mi_assert_internal(p != NULL); + _mi_memzero_aligned(p, mi_page_usable_block_size(page)); + return p; + } + else { + return _mi_page_malloc(heap, page, size, zero); + } } diff --git a/compat/mimalloc/random.c b/compat/mimalloc/random.c index 4e334d3dd76ebd..06d4ba4ad67a98 100644 --- a/compat/mimalloc/random.c +++ b/compat/mimalloc/random.c @@ -168,10 +168,12 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim #if defined(_WIN32) -#if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus) +#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) // We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using // dynamic overriding, we observed it can raise an exception when compiled with C++, and // sometimes deadlocks when also running under the VS debugger. +// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. +// To be continued.. #pragma comment (lib,"advapi32.lib") #define RtlGenRandom SystemFunction036 #ifdef __cplusplus @@ -185,16 +187,27 @@ static bool os_random_buf(void* buf, size_t buf_len) { return (RtlGenRandom(buf, (ULONG)buf_len) != 0); } #else -#include "compat/win32/lazyload.h" + #ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG #define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 #endif +typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); +static PBCryptGenRandom pBCryptGenRandom = NULL; + static bool os_random_buf(void* buf, size_t buf_len) { - DECLARE_PROC_ADDR(bcrypt, LONG, NTAPI, BCryptGenRandom, HANDLE, PUCHAR, ULONG, ULONG); - if (!INIT_PROC_ADDR(BCryptGenRandom)) - return 0; - return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); + if (pBCryptGenRandom == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); + if (hDll != NULL) { + pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); + } + } + if (pBCryptGenRandom == NULL) { + return false; + } + else { + return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); + } } #endif @@ -307,23 +320,41 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { return x; } -void _mi_random_init(mi_random_ctx_t* ctx) { +static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { uint8_t key[32]; - if (!os_random_buf(key, sizeof(key))) { + if (use_weak || !os_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time #if !defined(__wasi__) - _mi_warning_message("unable to use secure randomness\n"); + if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); } #endif uintptr_t x = _mi_os_random_weak(0); for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } + ctx->weak = true; + } + else { + ctx->weak = false; } chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); } +void _mi_random_init(mi_random_ctx_t* ctx) { + mi_random_init_ex(ctx, false); +} + +void _mi_random_init_weak(mi_random_ctx_t * ctx) { + mi_random_init_ex(ctx, true); +} + +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx) { + if (ctx->weak) { + _mi_random_init(ctx); + } +} + /* -------------------------------------------------------- test vectors from ----------------------------------------------------------- */ diff --git a/compat/mimalloc/readme.md b/compat/mimalloc/readme.md index 0db3ff6f112ca0..932ac0f178dce7 100644 --- a/compat/mimalloc/readme.md +++ b/compat/mimalloc/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the run-time systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.0.6` (2022-04-14). -Latest stable tag: `v1.7.6` (2022-02-14). +Latest release tag: `v2.0.9` (2022-12-23). +Latest stable tag: `v1.7.9` (2022-12-23). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -27,6 +27,8 @@ It also has an easy way to override the default allocator in [Windows](#override to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for bounded worst-case times with reference counting). + Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS, + Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding. - __free list sharding__: instead of one big free list (per size class) we have many smaller lists per "mimalloc page" which reduces fragmentation and increases locality -- @@ -42,7 +44,7 @@ It also has an easy way to override the default allocator in [Windows](#override similar to randomized algorithms like skip lists where adding a random oracle removes the need for a more complex algorithm. - __eager page reset__: when a "page" becomes empty (with increased chance - due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") + due to free list sharding) the memory is marked to the OS as unused (reset or decommitted) reducing (real) memory pressure and fragmentation, especially in long running programs. - __secure__: _mimalloc_ can be built in secure mode, adding guard pages, @@ -52,13 +54,12 @@ It also has an easy way to override the default allocator in [Windows](#override - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions. A heap can be destroyed at once instead of deallocating each object separately. - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation - times (_wcat_), bounded space overhead (~0.2% meta-data, with low internal fragmentation), - and has no internal points of contention using only atomic operations. + times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low + internal fragmentation), and has no internal points of contention using only atomic operations. - __fast__: In our benchmarks (see [below](#performance)), _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), - and often uses less memory. A nice property - is that it does consistently well over a wide range of benchmarks. There is also good huge OS page - support for larger server programs. + and often uses less memory. A nice property is that it does consistently well over a wide range + of benchmarks. There is also good huge OS page support for larger server programs. The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. @@ -77,6 +78,15 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with asan and improved [Valgrind] support. Support abitrary large + alignments (in particular for `std::pmr` pools). + Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). + Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). + Various small bug fixes. + +* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind] for leak testing and heap block overflow detection. Initial + support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . + * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object @@ -337,6 +347,44 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to - Double free's, and freeing invalid heap pointers are detected. - Corrupted free-lists and some forms of use-after-free are detected. +## Valgrind + +Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and +also for other address sanitizers). +However, it is possible to build mimalloc with Valgrind support. This has a small performance +overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final +executables. To build with valgrind support, use the `MI_VALGRIND=ON` cmake option: + +``` +> cmake ../.. -DMI_VALGRIND=ON +``` + +This can also be combined with secure mode or debug mode. +You can then run your programs directly under valgrind: + +``` +> valgrind +``` + +If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly), +you also need to tell `valgrind` to not intercept those calls itself, and use: + +``` +> MIMALLOC_SHOW_STATS=1 valgrind --soname-synonyms=somalloc=*mimalloc* -- +``` + +By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed +used and not the standard allocator. Even though the [Valgrind option][valgrind-soname] +is called `--soname-synonyms`, this also +works when overriding with a static library or object file. Unfortunately, it is not possible to +dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`. +See also the `test/test-wrong.c` file to test with `valgrind`. + +Valgrind support is in its initial development -- please report any issues. + +[Valgrind]: https://valgrind.org/ +[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms + # Overriding Standard Malloc diff --git a/compat/mimalloc/segment-cache.c b/compat/mimalloc/segment-cache.c index c071239ce32bee..7a244c3ff26bd5 100644 --- a/compat/mimalloc/segment-cache.c +++ b/compat/mimalloc/segment-cache.c @@ -39,8 +39,17 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free +static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { + mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); +} -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline static void* mi_segment_cache_pop_ex( + bool all_suitable, + size_t size, mi_commit_mask_t* commit_mask, + mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, + mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return NULL; @@ -60,12 +69,15 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm // find an available slot mi_bitmap_index_t bitidx = 0; bool claimed = false; + mi_arena_id_t req_arena_id = _req_arena_id; + mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? + if (*large) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = true; } if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = false; } @@ -89,6 +101,12 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm #endif } + +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) +{ + return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld); +} + static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { if (mi_commit_mask_is_empty(cmask)) { @@ -115,14 +133,14 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo #define MI_MAX_PURGE_PER_PUSH (4) -static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld) +static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) { MI_UNUSED(tld); if (!mi_option_is_enabled(mi_option_allow_decommit)) return; mi_msecs_t now = _mi_clock_now(); size_t purged = 0; - const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); - size_t idx = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); + const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); + size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots if (idx >= MI_CACHE_MAX) idx = 0; // wrap mi_cache_slot_t* slot = &cache[idx]; @@ -146,13 +164,43 @@ static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld } _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } - if (!force && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push + if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push } } } void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { - mi_segment_cache_purge(force, tld ); + if (force) { + // called on `mi_collect(true)` but not on thread termination + _mi_segment_cache_free_all(tld); + } + else { + mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); + } +} + +void _mi_segment_cache_free_all(mi_os_tld_t* tld) { + mi_commit_mask_t commit_mask; + mi_commit_mask_t decommit_mask; + bool is_pinned; + bool is_zero; + size_t memid; + const size_t size = MI_SEGMENT_SIZE; + // iterate twice: first large pages, then regular memory + for (int i = 0; i < 2; i++) { + void* p; + do { + // keep popping and freeing the memory + bool large = (i == 0); + p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, + &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); + if (p != NULL) { + size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); + if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); + _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); + } + } while (p != NULL); + } } mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) @@ -173,7 +221,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me } // purge expired entries - mi_segment_cache_purge(false /* force? */, tld); + mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); // find an available slot mi_bitmap_index_t bitidx; @@ -237,7 +285,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { - mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? + mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? if ((uintptr_t)segment >= MI_MAX_ADDRESS) { *bitidx = 0; return MI_SEGMENT_MAP_WSIZE; @@ -277,13 +325,14 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) { // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. static mi_segment_t* _mi_segment_of(const void* p) { + if (p == NULL) return NULL; mi_segment_t* segment = _mi_ptr_segment(p); - if (segment == NULL) return NULL; + mi_assert_internal(segment != NULL); size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { + if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { return segment; // yes, allocated by us } if (index==MI_SEGMENT_MAP_WSIZE) return NULL; @@ -324,7 +373,7 @@ static mi_segment_t* _mi_segment_of(const void* p) { mi_assert_internal((void*)segment < p); bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(cookie_ok); - if (mi_unlikely(!cookie_ok)) return NULL; + if mi_unlikely(!cookie_ok) return NULL; if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); return segment; diff --git a/compat/mimalloc/segment.c b/compat/mimalloc/segment.c index d772440d69f032..85158ece49371e 100644 --- a/compat/mimalloc/segment.c +++ b/compat/mimalloc/segment.c @@ -316,7 +316,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c ptrdiff_t idx = slice - segment->slices; size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects - size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); + size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } @@ -340,8 +340,10 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data (and one at the end of the segment) - guardsize = page_size; - required = _mi_align_up(required, page_size); + guardsize = page_size; + if (required > 0) { + required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size; + } } if (pre_size != NULL) *pre_size = isize; @@ -386,11 +388,13 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { + if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache + !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) + { const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os); + _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats); } } @@ -402,11 +406,11 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { /* ----------------------------------------------------------- - Span management + Commit/Decommit ranges ----------------------------------------------------------- */ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) { - mi_assert_internal(_mi_ptr_segment(p) == segment); + mi_assert_internal(_mi_ptr_segment(p + 1) == segment); mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); mi_commit_mask_create_empty(cm); if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return; @@ -459,15 +463,6 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes. - /* - if (commit && size > 0) { - const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE); - if (p + csize <= mi_segment_end(segment)) { - size = csize; - } - } - */ // commit liberal, but decommit conservative uint8_t* start = NULL; size_t full_size = 0; @@ -536,8 +531,12 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ } else if (segment->decommit_expire <= now) { // previous decommit mask already expired - // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) { + mi_segment_delayed_decommit(segment, true, stats); + } + else { + segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + } } else { // previous decommit mask is not yet expired, increase the expiration by a bit. @@ -570,12 +569,16 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st } +/* ----------------------------------------------------------- + Span free +----------------------------------------------------------- */ + static bool mi_segment_is_abandoned(mi_segment_t* segment) { return (segment->thread_id == 0); } // note: can be called on abandoned segments -static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) ? NULL : mi_span_queue_for(slice_count,tld)); @@ -595,7 +598,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size } // perhaps decommit - mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats); + if (allow_decommit) { + mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + } // and push it on the free page queue (if it was not a huge page) if (sq != NULL) mi_span_queue_push( sq, slice ); @@ -657,27 +662,20 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ } // and add the new free page - mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld); + mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld); return slice; } -static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { - mi_assert_internal(_mi_ptr_segment(slice)==segment); - mi_assert_internal(slice->slice_count >= slice_count); - mi_assert_internal(slice->xblock_size > 0); // no more in free queue - if (slice->slice_count <= slice_count) return; - mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - size_t next_index = mi_slice_index(slice) + slice_count; - size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, tld); - slice->slice_count = (uint32_t)slice_count; -} + +/* ----------------------------------------------------------- + Page allocation +----------------------------------------------------------- */ // Note: may still return NULL if committing the memory failed static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); - mi_slice_t* slice = &segment->slices[slice_index]; + mi_slice_t* const slice = &segment->slices[slice_index]; mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1); // commit before changing the slice data @@ -698,18 +696,21 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i size_t extra = slice_count-1; if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET; if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices - slice++; - for (size_t i = 1; i <= extra; i++, slice++) { - slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); - slice->slice_count = 0; - slice->xblock_size = 1; + + mi_slice_t* slice_next = slice + 1; + for (size_t i = 1; i <= extra; i++, slice_next++) { + slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); + slice_next->slice_count = 0; + slice_next->xblock_size = 1; } - // and also for the last one (if not set already) (the last one is needed for coalescing) + // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments) // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) - mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; - if (last < mi_segment_slices_end(segment) && last >= slice) { - last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1)); + mi_slice_t* last = slice + slice_count - 1; + mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment); + if (last > end) last = end; + if (last > slice) { + last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice)); last->slice_count = 0; last->xblock_size = 1; } @@ -721,7 +722,19 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i return page; } -static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) { +static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_ptr_segment(slice) == segment); + mi_assert_internal(slice->slice_count >= slice_count); + mi_assert_internal(slice->xblock_size > 0); // no more in free queue + if (slice->slice_count <= slice_count) return; + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); + size_t next_index = mi_slice_index(slice) + slice_count; + size_t next_count = slice->slice_count - slice_count; + mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld); + slice->slice_count = (uint32_t)slice_count; +} + +static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX); // search from best fit up mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld); @@ -730,19 +743,23 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) { if (slice->slice_count >= slice_count) { // found one - mi_span_queue_delete(sq, slice); mi_segment_t* segment = _mi_ptr_segment(slice); - if (slice->slice_count > slice_count) { - mi_segment_slice_split(segment, slice, slice_count, tld); - } - mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); - mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); - if (page == NULL) { - // commit failed; return NULL but first restore the slice - mi_segment_span_free_coalesce(slice, tld); - return NULL; + if (_mi_arena_memid_is_suitable(segment->memid, req_arena_id)) { + // found a suitable page span + mi_span_queue_delete(sq, slice); + + if (slice->slice_count > slice_count) { + mi_segment_slice_split(segment, slice, slice_count, tld); + } + mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); + mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); + if (page == NULL) { + // commit failed; return NULL but first restore the slice + mi_segment_span_free_coalesce(slice, tld); + return NULL; + } + return page; } - return page; } } sq++; @@ -756,17 +773,85 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm Segment allocation ----------------------------------------------------------- */ +static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id, + size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, + mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask, + bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) + +{ + // Allocate the segment from the OS + bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool is_pinned = false; + size_t memid = 0; + size_t align_offset = 0; + size_t alignment = MI_SEGMENT_ALIGN; + + if (page_alignment > 0) { + // mi_assert_internal(huge_page != NULL); + mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN); + alignment = page_alignment; + const size_t info_size = (*pinfo_slices) * MI_SEGMENT_SLICE_SIZE; + align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN ); + const size_t extra = align_offset - info_size; + // recalculate due to potential guard pages + *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); + //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE); + //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE; + } + const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; + mi_segment_t* segment = NULL; + + // get from cache? + if (page_alignment == 0) { + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + } + + // get from OS + if (segment==NULL) { + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, pcommit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + if (segment == NULL) return NULL; // failed to allocate + if (*pcommit) { + mi_commit_mask_create_full(pcommit_mask); + } + else { + mi_commit_mask_create_empty(pcommit_mask); + } + } + mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + + const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_assert_internal(commit_needed>0); + mi_commit_mask_t commit_needed_mask; + mi_commit_mask_create(0, commit_needed, &commit_needed_mask); + if (!mi_commit_mask_all_set(pcommit_mask, &commit_needed_mask)) { + // at least commit the info slices + mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE); + bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, is_zero, tld->stats); + if (!ok) return NULL; // failed to commit + mi_commit_mask_set(pcommit_mask, &commit_needed_mask); + } + mi_track_mem_undefined(segment,commit_needed); + segment->memid = memid; + segment->mem_is_pinned = is_pinned; + segment->mem_is_large = mem_large; + segment->mem_is_committed = mi_commit_mask_is_full(pcommit_mask); + segment->mem_alignment = alignment; + segment->mem_align_offset = align_offset; + mi_segments_track_size((long)(segment_size), tld); + _mi_segment_map_allocated_at(segment); + return segment; +} + + // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) +static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL)); - mi_assert_internal((segment==NULL) || (segment!=NULL && required==0)); + // calculate needed sizes first size_t info_slices; size_t pre_size; - const size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); - const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); - const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; + size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) const bool eager_delay = (// !_mi_os_has_overcommit() && // never delay on overcommit systems @@ -774,89 +859,43 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (required > 0); - - // Try to get from our cache first bool is_zero = false; - const bool commit_info_still_good = (segment != NULL); + mi_commit_mask_t commit_mask; mi_commit_mask_t decommit_mask; - if (segment != NULL) { - commit_mask = segment->commit_mask; - decommit_mask = segment->decommit_mask; - } - else { - mi_commit_mask_create_empty(&commit_mask); - mi_commit_mask_create_empty(&decommit_mask); - } - if (segment==NULL) { - // Allocate the segment from the OS - bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy - bool is_pinned = false; - size_t memid = 0; - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld); - if (segment==NULL) { - segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); - if (segment == NULL) return NULL; // failed to allocate - if (commit) { - mi_commit_mask_create_full(&commit_mask); - } - else { - mi_commit_mask_create_empty(&commit_mask); - } - } - mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + mi_commit_mask_create_empty(&commit_mask); + mi_commit_mask_create_empty(&decommit_mask); - const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); - mi_assert_internal(commit_needed>0); - mi_commit_mask_t commit_needed_mask; - mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - if (!mi_commit_mask_all_set(&commit_mask, &commit_needed_mask)) { - // at least commit the info slices - mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= info_slices*MI_SEGMENT_SLICE_SIZE); - bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, &is_zero, tld->stats); - if (!ok) return NULL; // failed to commit - mi_commit_mask_set(&commit_mask, &commit_needed_mask); - } - segment->memid = memid; - segment->mem_is_pinned = is_pinned; - segment->mem_is_large = mem_large; - segment->mem_is_committed = mi_commit_mask_is_full(&commit_mask); - mi_segments_track_size((long)(segment_size), tld); - _mi_segment_map_allocated_at(segment); - } + // Allocate the segment from the OS + mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, + &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, + &is_zero, &commit, tld, os_tld); + if (segment == NULL) return NULL; - // zero the segment info? -- not always needed as it is zero initialized from the OS + // zero the segment info? -- not always needed as it may be zero initialized from the OS mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan if (!is_zero) { ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; - memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices); + memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1)); // one more } - if (!commit_info_still_good) { - segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); - if (segment->allow_decommit) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - segment->decommit_mask = decommit_mask; - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - #if MI_DEBUG>2 - const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); - mi_commit_mask_t commit_needed_mask; - mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); - #endif - } - else { - mi_assert_internal(mi_commit_mask_is_empty(&decommit_mask)); - segment->decommit_expire = 0; - mi_commit_mask_create_empty( &segment->decommit_mask ); - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); - } + segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed + segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); + if (segment->allow_decommit) { + segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay); + segment->decommit_mask = decommit_mask; + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + #if MI_DEBUG>2 + const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_commit_mask_t commit_needed_mask; + mi_commit_mask_create(0, commit_needed, &commit_needed_mask); + mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); + #endif } - // initialize segment info + const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; segment->thread_id = _mi_thread_id(); @@ -891,7 +930,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page mi_assert_internal(huge_page==NULL); - mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld); } else { mi_assert_internal(huge_page!=NULL); @@ -906,12 +945,6 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } -// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { - return mi_segment_init(NULL, required, tld, os_tld, huge_page); -} - - static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { MI_UNUSED(force); mi_assert_internal(segment != NULL); @@ -1149,8 +1182,8 @@ static mi_segment_t* mi_abandoned_pop(void) { // Check efficiently if it is empty (or if the visited list needs to be moved) mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); - if (mi_likely(segment == NULL)) { - if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL + if mi_likely(segment == NULL) { + if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL return NULL; } } @@ -1367,6 +1400,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; + // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments + // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? + bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { // free the segment (by forced reclaim) to make it available to other threads. @@ -1376,13 +1412,13 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } - else if (has_page) { + else if (has_page && is_suitable) { // found a large enough free span, or a page of the right block_size with free space // we return the result of reclaim (which is usually `segment`) as it might free // the segment due to concurrent frees (in which case `NULL` is returned). return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } - else if (segment->abandoned_visits > 3) { + else if (segment->abandoned_visits > 3 && is_suitable) { // always reclaim on 3rd visit to limit the abandoned queue length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } @@ -1442,7 +1478,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_ return segment; } // 2. otherwise allocate a fresh segment - return mi_segment_alloc(0, tld, os_tld, NULL); + return mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL); } @@ -1458,7 +1494,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size); - mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); + mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, heap->arena_id, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); if (page==NULL) { // no free page, allocate a new segment and try again if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) { @@ -1482,17 +1518,37 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki Huge page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page = NULL; - mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page); + mi_segment_t* segment = mi_segment_alloc(size,page_alignment,req_arena_id,tld,os_tld,&page); if (segment == NULL || page==NULL) return NULL; mi_assert_internal(segment->used==1); mi_assert_internal(mi_page_block_size(page) >= size); + #if MI_HUGE_PAGE_ABANDON segment->thread_id = 0; // huge segments are immediately abandoned + #endif + + // for huge pages we initialize the xblock_size as we may + // overallocate to accommodate large alignments. + size_t psize; + uint8_t* start = _mi_segment_page_start(segment, page, &psize); + page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); + + // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) + if (page_alignment > 0 && segment->allow_decommit) { + uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment); + mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); + mi_assert_internal(psize - (aligned_p - start) >= size); + uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list + ptrdiff_t decommit_size = aligned_p - decommit_start; + _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + } + return page; } +#if MI_HUGE_PAGE_ABANDON // free huge block from another thread void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { // huge page segments are always abandoned and can be freed immediately by any thread @@ -1520,12 +1576,34 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block #endif } +#else +// reset memory of a huge block from another thread +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + MI_UNUSED(page); + mi_assert_internal(segment->kind == MI_SEGMENT_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(page->used == 1); // this is called just before the free + mi_assert_internal(page->free == NULL); + if (segment->allow_decommit) { + const size_t csize = mi_usable_size(block) - sizeof(mi_block_t); + uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); + _mi_os_decommit(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + } +} +#endif + /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { + if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) { + mi_assert_internal(_mi_is_power_of_two(page_alignment)); + mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); + if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; } + page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld); + } + else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { @@ -1535,8 +1613,9 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld); } else { - page = mi_segment_huge_page_alloc(block_size,tld,os_tld); + page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld); } + mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid)); mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); return page; } diff --git a/compat/mimalloc/stats.c b/compat/mimalloc/stats.c index c722189f7044e7..c09d816c4eee0a 100644 --- a/compat/mimalloc/stats.c +++ b/compat/mimalloc/stats.c @@ -170,19 +170,23 @@ static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* ar else mi_print_amount(n,0,out,arg); } -static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) { +static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) { _mi_fprintf(out, arg,"%10s:", msg); - if (unit>0) { + if (unit > 0) { mi_print_amount(stat->peak, unit, out, arg); mi_print_amount(stat->allocated, unit, out, arg); mi_print_amount(stat->freed, unit, out, arg); mi_print_amount(stat->current, unit, out, arg); mi_print_amount(unit, 1, out, arg); mi_print_count(stat->allocated, unit, out, arg); - if (stat->allocated > stat->freed) - _mi_fprintf(out, arg, " not all freed!\n"); - else + if (stat->allocated > stat->freed) { + _mi_fprintf(out, arg, " "); + _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok)); + _mi_fprintf(out, arg, "\n"); + } + else { _mi_fprintf(out, arg, " ok\n"); + } } else if (unit<0) { mi_print_amount(stat->peak, -1, out, arg); @@ -210,6 +214,10 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t } } +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + mi_stat_print_ex(stat, msg, unit, out, arg, NULL); +} + static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); @@ -267,7 +275,7 @@ static void mi_buffered_flush(buffered_t* buf) { buf->used = 0; } -static void mi_buffered_out(const char* msg, void* arg) { +static void mi_cdecl mi_buffered_out(const char* msg, void* arg) { buffered_t* buf = (buffered_t*)arg; if (msg==NULL || buf==NULL) return; for (const char* src = msg; *src != 0; src++) { @@ -312,8 +320,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_print(&stats->malloc, "malloc req", 1, out, arg); _mi_fprintf(out, arg, "\n"); #endif - mi_stat_print(&stats->reserved, "reserved", 1, out, arg); - mi_stat_print(&stats->committed, "committed", 1, out, arg); + mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); + mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, ""); mi_stat_print(&stats->reset, "reset", 1, out, arg); mi_stat_print(&stats->page_committed, "touched", 1, out, arg); mi_stat_print(&stats->segments, "segments", -1, out, arg); @@ -457,9 +465,6 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { #if defined(_WIN32) #include -#include -#pragma comment(lib,"psapi.lib") -#include "compat/win32/lazyload.h" static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; @@ -469,6 +474,22 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) { return msecs; } +typedef struct _PROCESS_MEMORY_COUNTERS { + DWORD cb; + DWORD PageFaultCount; + SIZE_T PeakWorkingSetSize; + SIZE_T WorkingSetSize; + SIZE_T QuotaPeakPagedPoolUsage; + SIZE_T QuotaPagedPoolUsage; + SIZE_T QuotaPeakNonPagedPoolUsage; + SIZE_T QuotaNonPagedPoolUsage; + SIZE_T PagefileUsage; + SIZE_T PeakPagefileUsage; +} PROCESS_MEMORY_COUNTERS; +typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS; +typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); +static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; + static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) { *elapsed = _mi_clock_end(mi_process_start); @@ -479,18 +500,26 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); *utime = filetime_msecs(&ut); *stime = filetime_msecs(&st); + + // load psapi on demand + if (pGetProcessMemoryInfo == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); + if (hDll != NULL) { + pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); + } + } + + // get process info PROCESS_MEMORY_COUNTERS info; - DECLARE_PROC_ADDR(psapi, BOOL, WINAPI, GetProcessMemoryInfo, HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); - if (INIT_PROC_ADDR(GetProcessMemoryInfo)) { - GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; - } else { - *current_rss = *peak_rss = *current_commit = *peak_commit = *page_faults = 0; + memset(&info, 0, sizeof(info)); + if (pGetProcessMemoryInfo != NULL) { + pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); } + *current_rss = (size_t)info.WorkingSetSize; + *peak_rss = (size_t)info.PeakWorkingSetSize; + *current_commit = (size_t)info.PagefileUsage; + *peak_commit = (size_t)info.PeakPagefileUsage; + *page_faults = (size_t)info.PageFaultCount; } #elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__))