18 KiB
Memory management overview
The kernel's memory subsystem is implemented in memory.c and exposes three layers:
- Physical Memory Manager (PMM) – a bitmap-based page-frame allocator over a fixed-size pool obtained from the loader at boot.
- Paging helpers – routines to walk and extend the live 4-level x86-64 page tables, map/unmap virtual addresses, and translate virtual to physical.
- Heap allocator – a first-fit free-list allocator with block splitting and bidirectional coalescing, backed by pages from the PMM.
All three layers are wired together and brought up by memory_init:
void memory_init(BootInfo *Boot)
{
SAFE_PRINT(Boot, L"Initializing memory management...\n\r");
pmm_init(Boot);
paging_init(Boot);
heap_init(Boot);
SAFE_PRINT(Boot, L"Memory management ready.\n\r\n\r");
}
Physical Memory Manager (PMM)
Design
The PMM manages a pool of 4 KiB physical page frames acquired from the loader via BootInfo->alloc_pages. It uses a simple bitmap to track free vs. allocated pages:
static UINT64 pmm_pool_base = 0;
static UINTN pmm_total_pages = 0;
static UINTN pmm_free_count = 0;
static UINT8 pmm_bitmap[PMM_POOL_PAGES / 8];
static BOOLEAN pmm_ready = FALSE;
Each bit in pmm_bitmap corresponds to a single page in the pool:
- 0 – page is free.
- 1 – page is allocated.
Helper functions manipulate these bits:
static void pmm_set_bit(UINTN idx)
{
pmm_bitmap[idx / 8] |= (UINT8)(1U << (idx % 8));
}
static void pmm_clear_bit(UINTN idx)
{
pmm_bitmap[idx / 8] &= (UINT8)~(1U << (idx % 8));
}
static BOOLEAN pmm_test_bit(UINTN idx)
{
return (pmm_bitmap[idx / 8] & (1U << (idx % 8))) != 0;
}
Initialisation
pmm_init obtains the underlying page pool from the loader and prepares the bitmap:
void pmm_init(BootInfo *Boot)
{
KSTATUS Status;
UINT64 pool_addr = 0;
UINTN i;
/* Zero the bitmap – all pages start free */
for (i = 0; i < sizeof(pmm_bitmap); i++) {
pmm_bitmap[i] = 0;
}
if (Boot == NULL || Boot->alloc_pages == NULL) {
SAFE_PRINT(Boot, L"PMM: page allocator unavailable\n\r");
return;
}
Status = Boot->alloc_pages(PMM_POOL_PAGES, &pool_addr);
...
pmm_pool_base = (UINT64)pool_addr;
pmm_total_pages = PMM_POOL_PAGES;
pmm_free_count = PMM_POOL_PAGES;
pmm_ready = TRUE;
SAFE_PRINT(Boot, L" PMM : %d pages (%d KB) at 0x%lx\n\r",
pmm_total_pages,
(pmm_total_pages * PAGE_SIZE) / 1024,
pmm_pool_base);
}
Instead of parsing the firmware's memory map, this OS delegates low-level page allocation to the loader via BootInfo->alloc_pages. The PMM then sub-allocates from this contiguous pool using its own bitmap.
Single-page allocation
pmm_alloc_page scans the bitmap for the first free page, marks it allocated, and returns the physical address:
UINT64 pmm_alloc_page(void)
{
UINTN i;
if (!pmm_ready || pmm_free_count == 0) {
return 0;
}
for (i = 0; i < pmm_total_pages; i++) {
if (!pmm_test_bit(i)) {
pmm_set_bit(i);
pmm_free_count--;
return pmm_pool_base + ((UINT64)i * PAGE_SIZE);
}
}
return 0;
}
The corresponding free operation validates the address and clears the bit:
void pmm_free_page(UINT64 phys_addr)
{
UINTN idx;
if (!pmm_ready) return;
if (phys_addr < pmm_pool_base) return;
idx = (UINTN)((phys_addr - pmm_pool_base) / PAGE_SIZE);
if (idx >= pmm_total_pages) return;
if (!pmm_test_bit(idx)) return; /* already free */
pmm_clear_bit(idx);
pmm_free_count++;
}
Contiguous allocation
For multi-page allocations, pmm_alloc_pages performs a first-fit search for a run of count consecutive free bits:
UINT64 pmm_alloc_pages(UINTN count)
{
UINTN i, j;
BOOLEAN found;
if (!pmm_ready || count == 0 || count > pmm_total_pages
|| pmm_free_count < count) {
return 0;
}
for (i = 0; i + count <= pmm_total_pages; i++) {
found = TRUE;
for (j = 0; j < count; j++) {
if (pmm_test_bit(i + j)) {
found = FALSE;
i += j; /* skip past the used page */
break;
}
}
if (found) {
for (j = 0; j < count; j++) {
pmm_set_bit(i + j);
}
pmm_free_count -= count;
return pmm_pool_base + ((UINT64)i * PAGE_SIZE);
}
}
return 0;
}
pmm_free_pages simply calls pmm_free_page for each page in the range.
Paging helpers
The paging layer operates directly on the current CR3 page table hierarchy and uses the PMM to allocate new page-table pages on demand.
Reading CR3 and locating the PML4
static UINT64 read_cr3(void)
{
UINT64 cr3;
__asm__ __volatile__("mov %%cr3, %0" : "=r"(cr3));
return cr3;
}
static void invlpg(UINT64 addr)
{
__asm__ __volatile__("invlpg (%0)" :: "r"(addr) : "memory");
}
static UINT64 *get_pml4(void)
{
return (UINT64 *)(UINTN)(read_cr3() & PTE_ADDR_MASK);
}
read_cr3returns the physical address of the current PML4.get_pml4masks off flag bits usingPTE_ADDR_MASKand casts the result to a pointer, assuming identity mapping of low physical memory (as set up by the loader).
paging_init logs the initial CR3 value for diagnostic purposes:
void paging_init(BootInfo *Boot)
{
SAFE_PRINT(Boot, L" Page: CR3 = 0x%lx (identity-mapped by loader)\n\r",
read_cr3());
}
Walking page-table levels
paging_walk_level abstracts a single step down the PML4 → PDPT → PD → PT hierarchy:
static UINT64 *paging_walk_level(UINT64 *table, UINTN index, BOOLEAN create)
{
UINT64 *next;
UINTN i;
UINT64 page;
if (table[index] & PTE_PRESENT) {
return (UINT64 *)(UINTN)(table[index] & PTE_ADDR_MASK);
}
if (!create) {
return NULL;
}
page = pmm_alloc_page();
if (page == 0) {
return NULL;
}
/* Zero the freshly-allocated page table */
next = (UINT64 *)(UINTN)page;
for (i = 0; i < PAGE_SIZE / sizeof(UINT64); i++) {
next[i] = 0;
}
table[index] = page | PTE_PRESENT | PTE_WRITABLE;
return next;
}
If create is true and the entry is missing, it:
- Allocates a fresh page with
pmm_alloc_page. - Clears it.
- Installs it as the next-level table with base address + default flags (
PTE_PRESENT | PTE_WRITABLE).
Mapping and unmapping pages
To map a single 4 KiB page, the kernel:
- Decomposes the virtual address into PML4/PDPT/PD/PT indices.
- Walks or creates intermediate tables.
- Installs a PTE with the desired flags.
- Invalidates the TLB entry with
invlpg.
BOOLEAN paging_map_page(UINT64 virt, UINT64 phys, UINT64 flags)
{
UINT64 *pml4, *pdpt, *pd, *pt;
UINTN pml4i, pdpti, pdi, pti;
pml4i = (virt >> 39) & 0x1FF;
pdpti = (virt >> 30) & 0x1FF;
pdi = (virt >> 21) & 0x1FF;
pti = (virt >> 12) & 0x1FF;
pml4 = get_pml4();
pdpt = paging_walk_level(pml4, pml4i, TRUE);
if (pdpt == NULL) return FALSE;
/* 1 GB huge page – cannot carve a 4 KB mapping inside it */
if (pdpt[pdpti] & PTE_HUGE) return FALSE;
pd = paging_walk_level(pdpt, pdpti, TRUE);
if (pd == NULL) return FALSE;
/* 2 MB huge page – cannot carve a 4 KB mapping inside it */
if (pd[pdi] & PTE_HUGE) return FALSE;
pt = paging_walk_level(pd, pdi, TRUE);
if (pt == NULL) return FALSE;
pt[pti] = (phys & PTE_ADDR_MASK) | flags | PTE_PRESENT;
invlpg(virt);
return TRUE;
}
Unmapping follows the same index computation but stops early if an intermediate table or mapping is missing or a huge-page mapping is in place:
void paging_unmap_page(UINT64 virt)
{
UINT64 *pml4, *pdpt, *pd, *pt;
UINTN pml4i, pdpti, pdi, pti;
pml4i = (virt >> 39) & 0x1FF;
pdpti = (virt >> 30) & 0x1FF;
pdi = (virt >> 21) & 0x1FF;
pti = (virt >> 12) & 0x1FF;
pml4 = get_pml4();
pdpt = paging_walk_level(pml4, pml4i, FALSE);
if (pdpt == NULL) return;
if (pdpt[pdpti] & PTE_HUGE) return;
pd = paging_walk_level(pdpt, pdpti, FALSE);
if (pd == NULL) return;
if (pd[pdi] & PTE_HUGE) return;
pt = paging_walk_level(pd, pdi, FALSE);
if (pt == NULL) return;
pt[pti] = 0;
invlpg(virt);
}
Virtual-to-physical translation
paging_get_phys walks the existing hierarchy without allocating anything, and supports 4 KiB, 2 MiB, and 1 GiB mappings:
UINT64 paging_get_phys(UINT64 virt)
{
UINT64 *pml4, *pdpt, *pd, *pt;
UINTN pml4i, pdpti, pdi, pti;
pml4i = (virt >> 39) & 0x1FF;
pdpti = (virt >> 30) & 0x1FF;
pdi = (virt >> 21) & 0x1FF;
pti = (virt >> 12) & 0x1FF;
pml4 = get_pml4();
if (!(pml4[pml4i] & PTE_PRESENT)) return 0;
pdpt = (UINT64 *)(UINTN)(pml4[pml4i] & PTE_ADDR_MASK);
if (!(pdpt[pdpti] & PTE_PRESENT)) return 0;
if (pdpt[pdpti] & PTE_HUGE) {
/* 1 GB page */
return (pdpt[pdpti] & 0x000FFFFFC0000000ULL) | (virt & 0x3FFFFFFFULL);
}
pd = (UINT64 *)(UINTN)(pdpt[pdpti] & PTE_ADDR_MASK);
if (!(pd[pdi] & PTE_PRESENT)) return 0;
if (pd[pdi] & PTE_HUGE) {
/* 2 MB page */
return (pd[pdi] & 0x000FFFFFFFE00000ULL) | (virt & 0x1FFFFFULL);
}
pt = (UINT64 *)(UINTN)(pd[pdi] & PTE_ADDR_MASK);
if (!(pt[pti] & PTE_PRESENT)) return 0;
return (pt[pti] & PTE_ADDR_MASK) | (virt & 0xFFFULL);
}
This function is useful for diagnostics and for checking assumptions about how the firmware identity-mapped memory before entering the kernel.
Heap allocator
The heap allocator builds on top of the PMM to provide kmalloc/kfree semantics. It uses a singly linked list of heap blocks (HeapBlock), each containing metadata and a size field describing the payload.
Initialisation
heap_init obtains an initial contiguous region of heap memory and seeds the free list with a single large free block:
void heap_init(BootInfo *Boot)
{
UINT64 phys;
UINTN heap_size;
phys = pmm_alloc_pages(HEAP_INITIAL_PAGES);
if (phys == 0) {
SAFE_PRINT(Boot, L" Heap: failed to allocate pages\n\r");
return;
}
heap_size = HEAP_INITIAL_PAGES * PAGE_SIZE;
heap_start = (HeapBlock *)(UINTN)phys;
heap_start->magic = HEAP_BLOCK_MAGIC;
heap_start->state = HEAP_BLOCK_FREE;
heap_start->size = heap_size - sizeof(HeapBlock);
heap_start->next = NULL;
heap_start->prev = NULL;
heap_ready = TRUE;
SAFE_PRINT(Boot, L" Heap: %d KB at 0x%lx\n\r",
heap_size / 1024, phys);
}
The allocator assumes that the physical address returned by pmm_alloc_pages is accessible via identity mapping, so it can cast it directly to a HeapBlock *.
Alignment helper
Allocations are rounded up to a fixed alignment (e.g., 16 bytes) using align_up:
static UINTN align_up(UINTN val, UINTN align)
{
return (val + align - 1) & ~(align - 1);
}
Allocation (kmalloc)
kmalloc performs a first-fit search of the free list:
void *kmalloc(UINTN size)
{
HeapBlock *block, *split;
UINTN aligned;
if (!heap_ready || size == 0) {
return NULL;
}
aligned = align_up(size, HEAP_ALIGN);
for (block = heap_start; block != NULL; block = block->next) {
if (block->magic != HEAP_BLOCK_MAGIC) {
return NULL; /* heap corruption */
}
if (block->state != HEAP_BLOCK_FREE || block->size < aligned) {
continue;
}
/* Try to split if there is room for another header + 16 bytes */
if (block->size >= aligned + sizeof(HeapBlock) + HEAP_ALIGN) {
split = (HeapBlock *)((UINT8 *)block + sizeof(HeapBlock) + aligned);
split->magic = HEAP_BLOCK_MAGIC;
split->state = HEAP_BLOCK_FREE;
split->size = block->size - aligned - sizeof(HeapBlock);
split->next = block->next;
split->prev = block;
if (block->next != NULL) {
block->next->prev = split;
}
block->next = split;
block->size = aligned;
}
block->state = HEAP_BLOCK_USED;
return (void *)((UINT8 *)block + sizeof(HeapBlock));
}
return NULL; /* out of heap memory */
}
Notable details:
- Corruption detection – checks
HEAP_BLOCK_MAGICfor each block; any mismatch aborts withNULL. - Splitting – if the free block is large enough, it is split into:
- An allocated block of exactly
alignedbytes. - A new trailing free block (
split) with its own header.
- An allocated block of exactly
- Alignment – the returned pointer is
sizeof(HeapBlock)bytes after the header and aligned according toHEAP_ALIGN.
Freeing (kfree) and coalescing
kfree marks a block as free and then attempts to coalesce with neighboring free blocks to combat fragmentation:
void kfree(void *ptr)
{
HeapBlock *block;
if (ptr == NULL || !heap_ready) {
return;
}
block = (HeapBlock *)((UINT8 *)ptr - sizeof(HeapBlock));
if (block->magic != HEAP_BLOCK_MAGIC || block->state != HEAP_BLOCK_USED) {
return; /* bad pointer or double-free */
}
block->state = HEAP_BLOCK_FREE;
/* Coalesce with next neighbour */
if (block->next != NULL
&& block->next->magic == HEAP_BLOCK_MAGIC
&& block->next->state == HEAP_BLOCK_FREE) {
block->size += sizeof(HeapBlock) + block->next->size;
block->next = block->next->next;
if (block->next != NULL) {
block->next->prev = block;
}
}
/* Coalesce with previous neighbour */
if (block->prev != NULL
&& block->prev->magic == HEAP_BLOCK_MAGIC
&& block->prev->state == HEAP_BLOCK_FREE) {
block->prev->size += sizeof(HeapBlock) + block->size;
block->prev->next = block->next;
if (block->next != NULL) {
block->next->prev = block->prev;
}
}
}
The allocator never returns memory to the PMM; all heap pages remain reserved for heap use for the lifetime of the kernel.
Heap statistics
heap_get_stats walks the free list and aggregates total, used, and free bytes as well as block count:
void heap_get_stats(UINTN *total, UINTN *used, UINTN *free_mem,
UINTN *num_blocks)
{
HeapBlock *b;
*total = 0; *used = 0; *free_mem = 0; *num_blocks = 0;
if (!heap_ready) return;
for (b = heap_start; b != NULL && b->magic == HEAP_BLOCK_MAGIC;
b = b->next) {
(*num_blocks)++;
*total += b->size;
if (b->state == HEAP_BLOCK_USED) {
*used += b->size;
} else {
*free_mem += b->size;
}
}
}
These statistics are surfaced to the user via the mem and memtest commands.
Runtime memory diagnostics (mem and memtest)
The mem command (in commands.c) prints a snapshot of PMM and heap state by calling memory_print_stats:
void memory_print_stats(BootInfo *Boot)
{
UINTN h_total, h_used, h_free, h_blocks;
UINTN p_total, p_free, p_used;
p_total = pmm_get_total_pages();
p_free = pmm_get_free_pages();
p_used = p_total - p_free;
heap_get_stats(&h_total, &h_used, &h_free, &h_blocks);
SAFE_PRINT(Boot, L"\n\r");
SAFE_PRINT(Boot, L"Memory Statistics\n\r");
SAFE_PRINT(Boot, L"================================================\n\r");
...
SAFE_PRINT(Boot, L"Paging:\n\r");
SAFE_PRINT(Boot, L" CR3: 0x%lx\n\r", read_cr3());
SAFE_PRINT(Boot, L" Mode: 4-level (PML4)\n\r");
SAFE_PRINT(Boot, L"\n\r");
}
The memtest command runs a scripted set of tests that exercise heap allocation, heap free/coalescing, and PMM single- and multi-page allocation:
static void cmd_memtest(BootInfo *Boot, CHAR16 *Args)
{
void *ptrs[8];
UINTN sizes[] = { 16, 64, 128, 256, 512, 1024, 2048, 4096 };
UINTN i;
UINT64 page;
UINTN h_total, h_used, h_free, h_blocks;
(void)Args;
SAFE_PRINT(Boot, L"\n\r");
SAFE_PRINT(Boot, L"Memory Test\n\r");
SAFE_PRINT(Boot, L"================================================\n\r");
...
/* --- Heap allocation test --- */
...
/* --- Heap free test --- */
...
/* --- PMM page allocation test --- */
...
/* --- Multi-page allocation test --- */
...
SAFE_PRINT(Boot, L"\n\rAll memory tests completed.\n\r\n\r");
}
These commands provide a convenient way to validate memory subsystem behaviour from the Starling Terminal without needing an external debugger.