Skip to content

Commit 9ac8b40

Browse files
committed
feat(heap): slabs and other alloc improvements
1 parent adc8de7 commit 9ac8b40

4 files changed

Lines changed: 174 additions & 57 deletions

File tree

core/heap.c

Lines changed: 137 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@
3535
#include "eval.h"
3636
#include "error.h"
3737

38+
// Slab cache helpers
39+
#define SLAB_ORDER_MIN MIN_BLOCK_ORDER
40+
#define SLAB_ORDER_MAX (MIN_BLOCK_ORDER + SLAB_ORDERS - 1)
41+
#define IS_SLAB_ORDER(o) ((o) >= SLAB_ORDER_MIN && (o) <= SLAB_ORDER_MAX)
42+
#define SLAB_INDEX(o) ((o) - SLAB_ORDER_MIN)
43+
3844
#ifndef __EMSCRIPTEN__
3945
RAY_ASSERT(sizeof(struct block_t) == (2 * sizeof(struct obj_t)), "block_t must be 2x obj_t");
4046
#endif
@@ -63,6 +69,7 @@ heap_p heap_create(i64_t id) {
6369
heap->foreign_blocks = NULL;
6470

6571
memset(heap->freelist, 0, sizeof(heap->freelist));
72+
memset(heap->slabs, 0, sizeof(heap->slabs));
6673

6774
// Initialize swap path from environment or use default
6875
if (os_get_var("HEAP_SWAP", heap->swap_path, sizeof(heap->swap_path)) == -1) {
@@ -86,40 +93,8 @@ heap_p heap_create(i64_t id) {
8693
return heap;
8794
}
8895

89-
nil_t heap_destroy(heap_p heap) {
90-
i64_t i;
91-
block_p block, next;
92-
93-
if (heap == NULL)
94-
return;
95-
96-
LOG_INFO("Destroying heap");
97-
98-
// Ensure foreign blocks are freed
99-
if (heap->foreign_blocks != NULL)
100-
LOG_WARN("Heap[%lld]: foreign blocks not freed", heap->id);
101-
102-
// All the nodes remains are pools, so just munmap them
103-
for (i = MIN_BLOCK_ORDER; i <= MAX_POOL_ORDER; i++) {
104-
block = heap->freelist[i];
105-
106-
while (block) {
107-
next = block->next;
108-
if (i != block->pool_order) {
109-
LOG_ERROR("Heap[%lld]: leak order: %lld block: %p", heap->id, i, block);
110-
return;
111-
}
112-
113-
mmap_free(block, BSIZEOF(i));
114-
block = next;
115-
}
116-
}
117-
118-
// munmap heap
119-
mmap_free(heap, sizeof(struct heap_t));
120-
121-
LOG_DEBUG("Heap destroyed successfully");
122-
}
96+
// Defined after #ifdef blocks to use heap_flush_slabs
97+
nil_t heap_destroy(heap_p heap);
12398

12499
heap_p heap_get(nil_t) {
125100
LOG_TRACE("Getting heap instance");
@@ -128,6 +103,8 @@ heap_p heap_get(nil_t) {
128103

129104
#ifdef SYS_MALLOC
130105

106+
static nil_t heap_flush_slabs(heap_p heap) { UNUSED(heap); } // No-op for system malloc
107+
131108
raw_p heap_alloc(i64_t size) { return malloc(size); }
132109
raw_p heap_mmap(i64_t size) { return mmap_alloc(size); }
133110
raw_p heap_stack(i64_t size) { return mmap_stack(size); }
@@ -245,6 +222,20 @@ inline __attribute__((always_inline)) nil_t heap_split_block(heap_p heap, block_
245222
}
246223
}
247224

225+
// Flush slab caches back to freelists for coalescing
226+
static nil_t heap_flush_slabs(heap_p heap) {
227+
i64_t i;
228+
block_p block;
229+
230+
for (i = 0; i < SLAB_ORDERS; i++) {
231+
while (heap->slabs[i].count > 0) {
232+
block = heap->slabs[i].stack[--heap->slabs[i].count];
233+
// heap_insert_block will set used=0
234+
heap_insert_block(heap, block, SLAB_ORDER_MIN + i);
235+
}
236+
}
237+
}
238+
248239
raw_p heap_mmap(i64_t size) {
249240
raw_p ptr = mmap_alloc(size);
250241

@@ -273,26 +264,39 @@ raw_p __attribute__((hot)) heap_alloc(i64_t size) {
273264
block_p block;
274265
heap_p heap = VM->heap; // Cache heap pointer to avoid repeated VM calls
275266

276-
if (size == 0 || size > BSIZEOF(MAX_POOL_ORDER))
267+
if (UNLIKELY(size == 0 || size > BSIZEOF(MAX_POOL_ORDER)))
277268
return NULL;
278269

279270
block_size = BLOCKSIZE(size);
280271

281272
// calculate minimal order for this size
282273
order = ORDEROF(block_size);
283274

275+
// Fast path: check slab cache for small allocations
276+
if (LIKELY(IS_SLAB_ORDER(order))) {
277+
i64_t idx = SLAB_INDEX(order);
278+
if (LIKELY(heap->slabs[idx].count > 0)) {
279+
block = heap->slabs[idx].stack[--heap->slabs[idx].count];
280+
// Note: block->order, pool_order, and backed should already be valid
281+
// from when the block was first allocated. Just update used and heap_id.
282+
block->used = 1;
283+
block->heap_id = heap->id;
284+
return BLOCK2RAW(block);
285+
}
286+
}
287+
284288
// find least order block that fits
285289
i = (AVAIL_MASK << order) & heap->avail;
286290

287291
// no free block found for this size, so mmap it directly if it is bigger than pool size or
288292
// add a new pool and split as well
289-
if (i == 0) {
293+
if (UNLIKELY(i == 0)) {
290294
if (order >= MAX_BLOCK_ORDER) {
291295
LOG_TRACE("Adding pool of size %lld requested size %lld", BSIZEOF(order), size);
292296
size = BSIZEOF(order);
293297
block = heap_add_pool(size);
294298

295-
if (block == NULL)
299+
if (UNLIKELY(block == NULL))
296300
return NULL;
297301

298302
block->order = order;
@@ -306,7 +310,7 @@ raw_p __attribute__((hot)) heap_alloc(i64_t size) {
306310

307311
block = heap_add_pool(BSIZEOF(MAX_BLOCK_ORDER));
308312

309-
if (block == NULL)
313+
if (UNLIKELY(block == NULL))
310314
return NULL;
311315

312316
i = MAX_BLOCK_ORDER;
@@ -340,21 +344,26 @@ __attribute__((hot)) nil_t heap_free(raw_p ptr) {
340344
c8_t filename[64];
341345
heap_p heap = VM->heap; // Cache heap pointer
342346

343-
if (ptr == NULL || ptr == NULL_OBJ)
347+
if (UNLIKELY(ptr == NULL || ptr == NULL_OBJ))
344348
return;
345349

346350
block = RAW2BLOCK(ptr);
347351
order = block->order;
348352

349353
// Validate block metadata - detect memory corruption or invalid pointers
350-
// backed should only be 0 or 1, order should be >= MIN_BLOCK_ORDER for heap blocks
351-
if (block->backed != B8_FALSE && block->backed != B8_TRUE) {
354+
// backed should only be 0 or 1, order should be in valid range
355+
if (UNLIKELY(block->backed != B8_FALSE && block->backed != B8_TRUE)) {
352356
obj_p obj = (obj_p)ptr;
353357
PANIC("block: b=%d o=%d p=%p t=%d", block->backed, block->order, ptr, obj->type);
354358
}
355359

360+
// Validate order is in valid range (detect corruption or external objects)
361+
// External/mmap'd objects shouldn't be freed via heap_free
362+
if (UNLIKELY(order < MIN_BLOCK_ORDER || order > MAX_POOL_ORDER))
363+
return;
364+
356365
// Return block to the system and close file if it is file-backed
357-
if (block->backed) {
366+
if (UNLIKELY(block->backed)) {
358367
fd = (i64_t)block->pool;
359368
heap_remove_pool(block, BSIZEOF(order));
360369
// Get filename before closing - ignore errors as file may already be gone
@@ -366,7 +375,17 @@ __attribute__((hot)) nil_t heap_free(raw_p ptr) {
366375
return;
367376
}
368377

369-
if (heap->id != 0 && block->heap_id != heap->id) {
378+
// Fast path: push to slab cache for small blocks (same heap only)
379+
if (heap != NULL && order >= MIN_BLOCK_ORDER && IS_SLAB_ORDER(order) &&
380+
(heap->id == 0 || block->heap_id == heap->id)) {
381+
i64_t idx = SLAB_INDEX(order);
382+
if (heap->slabs[idx].count < SLAB_CACHE_SIZE) {
383+
heap->slabs[idx].stack[heap->slabs[idx].count++] = block;
384+
return;
385+
}
386+
}
387+
388+
if (UNLIKELY(heap->id != 0 && block->heap_id != heap->id)) {
370389
block->next = heap->foreign_blocks;
371390
heap->foreign_blocks = block;
372391
return;
@@ -377,8 +396,9 @@ __attribute__((hot)) nil_t heap_free(raw_p ptr) {
377396
if (block->pool_order == order)
378397
return heap_insert_block(heap, block, order);
379398

380-
// calculate buddy
399+
// calculate buddy and prefetch its metadata
381400
buddy = BUDDYOF(block, order);
401+
__builtin_prefetch(buddy, 0, 1); // read, low temporal locality
382402

383403
// buddy is used, or buddy is of different order, so we can't merge
384404
if (buddy->used || buddy->order != order)
@@ -443,6 +463,9 @@ i64_t heap_gc(nil_t) {
443463
block_p block, next;
444464
heap_p h = VM->heap; // Cache heap pointer
445465

466+
// Flush slab caches to allow coalescing
467+
heap_flush_slabs(h);
468+
446469
for (i = MAX_BLOCK_ORDER; i <= MAX_POOL_ORDER; i++) {
447470
block = h->freelist[i];
448471
size = BSIZEOF(i);
@@ -464,12 +487,20 @@ i64_t heap_gc(nil_t) {
464487
}
465488

466489
nil_t heap_borrow(heap_p heap) {
467-
i64_t i;
490+
i64_t i, j, half;
468491
heap_p h = VM->heap; // Cache heap pointer (source heap)
469492

493+
// Transfer half of slab cache entries to worker (improves small object alloc)
494+
for (i = 0; i < SLAB_ORDERS; i++) {
495+
half = h->slabs[i].count / 2;
496+
for (j = 0; j < half; j++) {
497+
heap->slabs[i].stack[heap->slabs[i].count++] = h->slabs[i].stack[--h->slabs[i].count];
498+
}
499+
}
500+
501+
// Borrow large pool blocks (>=32MB) for big allocations
470502
for (i = MAX_BLOCK_ORDER; i <= MAX_POOL_ORDER; i++) {
471-
// Only borrow if the source heap has a freelist[i] and it has more than one node and it is the pool (not a
472-
// splitted block)
503+
// Only borrow if source has freelist[i] with >1 node and it's a full pool
473504
if (h->freelist[i] == NULL || h->freelist[i]->next == NULL || h->freelist[i]->pool_order != i)
474505
continue;
475506

@@ -485,20 +516,33 @@ nil_t heap_borrow(heap_p heap) {
485516

486517
nil_t heap_merge(heap_p heap) {
487518
i64_t i;
488-
block_p block, last;
519+
block_p block, next, last;
489520
heap_p h = VM->heap; // Cache heap pointer (destination heap)
490521

491-
// First traverse foreign blocks and free them
522+
// Transfer slab caches back to main heap (if room), else flush to freelist
523+
for (i = 0; i < SLAB_ORDERS; i++) {
524+
// Transfer as many as fit into main's slab cache
525+
while (heap->slabs[i].count > 0 && h->slabs[i].count < SLAB_CACHE_SIZE) {
526+
h->slabs[i].stack[h->slabs[i].count++] = heap->slabs[i].stack[--heap->slabs[i].count];
527+
}
528+
// Flush remaining to main heap's freelist
529+
while (heap->slabs[i].count > 0) {
530+
block = heap->slabs[i].stack[--heap->slabs[i].count];
531+
heap_insert_block(h, block, SLAB_ORDER_MIN + i);
532+
}
533+
}
534+
535+
// Return foreign blocks via normal free path (includes coalescing)
492536
block = heap->foreign_blocks;
493537
while (block != NULL) {
494-
last = block;
495-
block = block->next;
496-
last->heap_id = h->id;
497-
heap_free(BLOCK2RAW(last));
538+
next = block->next;
539+
block->heap_id = h->id;
540+
heap_free(BLOCK2RAW(block));
541+
block = next;
498542
}
499-
500543
heap->foreign_blocks = NULL;
501544

545+
// Merge freelists: find tail, link to main head
502546
for (i = MIN_BLOCK_ORDER; i <= MAX_POOL_ORDER; i++) {
503547
block = heap->freelist[i];
504548
last = NULL;
@@ -515,7 +559,6 @@ nil_t heap_merge(heap_p heap) {
515559
h->freelist[i]->prev = last;
516560

517561
h->freelist[i] = heap->freelist[i];
518-
519562
heap->freelist[i] = NULL;
520563
}
521564
}
@@ -560,3 +603,42 @@ nil_t heap_print_blocks(heap_p heap) {
560603
}
561604

562605
#endif
606+
607+
// heap_destroy defined after #ifdef blocks to use heap_flush_slabs
608+
nil_t heap_destroy(heap_p heap) {
609+
i64_t i;
610+
block_p block, next;
611+
612+
if (heap == NULL)
613+
return;
614+
615+
LOG_INFO("Destroying heap");
616+
617+
// Flush slab caches first
618+
heap_flush_slabs(heap);
619+
620+
// Ensure foreign blocks are freed
621+
if (heap->foreign_blocks != NULL)
622+
LOG_WARN("Heap[%lld]: foreign blocks not freed", heap->id);
623+
624+
// All the nodes remains are pools, so just munmap them
625+
for (i = MIN_BLOCK_ORDER; i <= MAX_POOL_ORDER; i++) {
626+
block = heap->freelist[i];
627+
628+
while (block) {
629+
next = block->next;
630+
if (i != block->pool_order) {
631+
LOG_ERROR("Heap[%lld]: leak order: %lld block: %p", heap->id, i, block);
632+
return;
633+
}
634+
635+
mmap_free(block, BSIZEOF(i));
636+
block = next;
637+
}
638+
}
639+
640+
// munmap heap
641+
mmap_free(heap, sizeof(struct heap_t));
642+
643+
LOG_DEBUG("Heap destroyed successfully");
644+
}

core/heap.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
#define MAX_BLOCK_ORDER 25 // 2^25 = 32MB
3232
#define MAX_POOL_ORDER 38 // 2^38 = 256GB
3333

34+
// Small object cache (slab) for sizes 32, 64, 128, 256 bytes
35+
#define SLAB_CACHE_SIZE 32
36+
#define SLAB_ORDERS 4 // orders 5, 6, 7, 8
37+
3438
// Memory modes
3539
#define MMOD_INTERNAL 0xff
3640
#define MMOD_EXTERNAL_SIMPLE 0xfd
@@ -56,10 +60,17 @@ typedef struct block_t {
5660
struct block_t *next;
5761
} *block_p;
5862

63+
// Small object slab cache for fast alloc/free of common sizes
64+
typedef struct slab_cache_t {
65+
block_p stack[SLAB_CACHE_SIZE]; // LIFO stack of freed blocks
66+
i64_t count; // current stack depth
67+
} slab_cache_t;
68+
5969
typedef struct heap_t {
6070
i64_t id;
6171
block_p freelist[MAX_POOL_ORDER + 2]; // free list of blocks by order
6272
i64_t avail; // mask of available blocks by order
73+
slab_cache_t slabs[SLAB_ORDERS]; // small object caches for orders 6-9
6374
block_p foreign_blocks; // foreign blocks (to be freed by the owner)
6475
block_p backed_blocks; // backed blocks (to be unmapped)
6576
memstat_t memstat;

core/pool.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ nil_t pool_prepare(pool_p pool) {
311311
pool->done_count = 0;
312312

313313
n = pool->executors_count;
314-
for (i = 0; i < n; i++) {
314+
for (i = 1; i < n; i++) { // Skip executor[0] (main thread) - no self-borrow
315315
heap_borrow(pool->executors[i].heap);
316316
}
317317

@@ -425,7 +425,7 @@ obj_p pool_run(pool_p pool) {
425425

426426
// merge heaps
427427
n = pool->executors_count;
428-
for (i = 0; i < n; i++) {
428+
for (i = 1; i < n; i++) { // Skip executor[0] (main thread) - no self-merge
429429
heap_merge(pool->executors[i].heap);
430430
}
431431

0 commit comments

Comments
 (0)