Files
Operator-system/docs/memory-and-allocation.md
2026-02-27 21:04:56 +00:00

18 KiB
Raw Blame History

Memory management overview

The kernel's memory subsystem is implemented in memory.c and exposes three layers:

  • Physical Memory Manager (PMM) a bitmap-based page-frame allocator over a fixed-size pool obtained from the loader at boot.
  • Paging helpers routines to walk and extend the live 4-level x86-64 page tables, map/unmap virtual addresses, and translate virtual to physical.
  • Heap allocator a first-fit free-list allocator with block splitting and bidirectional coalescing, backed by pages from the PMM.

All three layers are wired together and brought up by memory_init:

void memory_init(BootInfo *Boot)
{
    SAFE_PRINT(Boot, L"Initializing memory management...\n\r");
    pmm_init(Boot);
    paging_init(Boot);
    heap_init(Boot);
    SAFE_PRINT(Boot, L"Memory management ready.\n\r\n\r");
}

Physical Memory Manager (PMM)

Design

The PMM manages a pool of 4 KiB physical page frames acquired from the loader via BootInfo->alloc_pages. It uses a simple bitmap to track free vs. allocated pages:

static UINT64  pmm_pool_base   = 0;
static UINTN   pmm_total_pages = 0;
static UINTN   pmm_free_count  = 0;
static UINT8   pmm_bitmap[PMM_POOL_PAGES / 8];
static BOOLEAN pmm_ready       = FALSE;

Each bit in pmm_bitmap corresponds to a single page in the pool:

  • 0 page is free.
  • 1 page is allocated.

Helper functions manipulate these bits:

static void pmm_set_bit(UINTN idx)
{
    pmm_bitmap[idx / 8] |= (UINT8)(1U << (idx % 8));
}

static void pmm_clear_bit(UINTN idx)
{
    pmm_bitmap[idx / 8] &= (UINT8)~(1U << (idx % 8));
}

static BOOLEAN pmm_test_bit(UINTN idx)
{
    return (pmm_bitmap[idx / 8] & (1U << (idx % 8))) != 0;
}

Initialisation

pmm_init obtains the underlying page pool from the loader and prepares the bitmap:

void pmm_init(BootInfo *Boot)
{
    KSTATUS Status;
    UINT64 pool_addr = 0;
    UINTN i;

    /* Zero the bitmap  all pages start free */
    for (i = 0; i < sizeof(pmm_bitmap); i++) {
        pmm_bitmap[i] = 0;
    }

    if (Boot == NULL || Boot->alloc_pages == NULL) {
        SAFE_PRINT(Boot, L"PMM: page allocator unavailable\n\r");
        return;
    }

    Status = Boot->alloc_pages(PMM_POOL_PAGES, &pool_addr);
    ...

    pmm_pool_base   = (UINT64)pool_addr;
    pmm_total_pages = PMM_POOL_PAGES;
    pmm_free_count  = PMM_POOL_PAGES;
    pmm_ready       = TRUE;

    SAFE_PRINT(Boot, L"  PMM : %d pages (%d KB) at 0x%lx\n\r",
               pmm_total_pages,
               (pmm_total_pages * PAGE_SIZE) / 1024,
               pmm_pool_base);
}

Instead of parsing the firmware's memory map, this OS delegates low-level page allocation to the loader via BootInfo->alloc_pages. The PMM then sub-allocates from this contiguous pool using its own bitmap.

Single-page allocation

pmm_alloc_page scans the bitmap for the first free page, marks it allocated, and returns the physical address:

UINT64 pmm_alloc_page(void)
{
    UINTN i;

    if (!pmm_ready || pmm_free_count == 0) {
        return 0;
    }

    for (i = 0; i < pmm_total_pages; i++) {
        if (!pmm_test_bit(i)) {
            pmm_set_bit(i);
            pmm_free_count--;
            return pmm_pool_base + ((UINT64)i * PAGE_SIZE);
        }
    }

    return 0;
}

The corresponding free operation validates the address and clears the bit:

void pmm_free_page(UINT64 phys_addr)
{
    UINTN idx;

    if (!pmm_ready) return;
    if (phys_addr < pmm_pool_base) return;

    idx = (UINTN)((phys_addr - pmm_pool_base) / PAGE_SIZE);
    if (idx >= pmm_total_pages) return;
    if (!pmm_test_bit(idx)) return;        /* already free */

    pmm_clear_bit(idx);
    pmm_free_count++;
}

Contiguous allocation

For multi-page allocations, pmm_alloc_pages performs a first-fit search for a run of count consecutive free bits:

UINT64 pmm_alloc_pages(UINTN count)
{
    UINTN i, j;
    BOOLEAN found;

    if (!pmm_ready || count == 0 || count > pmm_total_pages
        || pmm_free_count < count) {
        return 0;
    }

    for (i = 0; i + count <= pmm_total_pages; i++) {
        found = TRUE;
        for (j = 0; j < count; j++) {
            if (pmm_test_bit(i + j)) {
                found = FALSE;
                i += j;            /* skip past the used page */
                break;
            }
        }
        if (found) {
            for (j = 0; j < count; j++) {
                pmm_set_bit(i + j);
            }
            pmm_free_count -= count;
            return pmm_pool_base + ((UINT64)i * PAGE_SIZE);
        }
    }

    return 0;
}

pmm_free_pages simply calls pmm_free_page for each page in the range.


Paging helpers

The paging layer operates directly on the current CR3 page table hierarchy and uses the PMM to allocate new page-table pages on demand.

Reading CR3 and locating the PML4

static UINT64 read_cr3(void)
{
    UINT64 cr3;
    __asm__ __volatile__("mov %%cr3, %0" : "=r"(cr3));
    return cr3;
}

static void invlpg(UINT64 addr)
{
    __asm__ __volatile__("invlpg (%0)" :: "r"(addr) : "memory");
}

static UINT64 *get_pml4(void)
{
    return (UINT64 *)(UINTN)(read_cr3() & PTE_ADDR_MASK);
}
  • read_cr3 returns the physical address of the current PML4.
  • get_pml4 masks off flag bits using PTE_ADDR_MASK and casts the result to a pointer, assuming identity mapping of low physical memory (as set up by the loader).

paging_init logs the initial CR3 value for diagnostic purposes:

void paging_init(BootInfo *Boot)
{
    SAFE_PRINT(Boot, L"  Page: CR3 = 0x%lx (identity-mapped by loader)\n\r",
               read_cr3());
}

Walking page-table levels

paging_walk_level abstracts a single step down the PML4 → PDPT → PD → PT hierarchy:

static UINT64 *paging_walk_level(UINT64 *table, UINTN index, BOOLEAN create)
{
    UINT64 *next;
    UINTN i;
    UINT64 page;

    if (table[index] & PTE_PRESENT) {
        return (UINT64 *)(UINTN)(table[index] & PTE_ADDR_MASK);
    }

    if (!create) {
        return NULL;
    }

    page = pmm_alloc_page();
    if (page == 0) {
        return NULL;
    }

    /* Zero the freshly-allocated page table */
    next = (UINT64 *)(UINTN)page;
    for (i = 0; i < PAGE_SIZE / sizeof(UINT64); i++) {
        next[i] = 0;
    }

    table[index] = page | PTE_PRESENT | PTE_WRITABLE;
    return next;
}

If create is true and the entry is missing, it:

  • Allocates a fresh page with pmm_alloc_page.
  • Clears it.
  • Installs it as the next-level table with base address + default flags (PTE_PRESENT | PTE_WRITABLE).

Mapping and unmapping pages

To map a single 4 KiB page, the kernel:

  1. Decomposes the virtual address into PML4/PDPT/PD/PT indices.
  2. Walks or creates intermediate tables.
  3. Installs a PTE with the desired flags.
  4. Invalidates the TLB entry with invlpg.
BOOLEAN paging_map_page(UINT64 virt, UINT64 phys, UINT64 flags)
{
    UINT64 *pml4, *pdpt, *pd, *pt;
    UINTN pml4i, pdpti, pdi, pti;

    pml4i = (virt >> 39) & 0x1FF;
    pdpti = (virt >> 30) & 0x1FF;
    pdi   = (virt >> 21) & 0x1FF;
    pti   = (virt >> 12) & 0x1FF;

    pml4 = get_pml4();

    pdpt = paging_walk_level(pml4, pml4i, TRUE);
    if (pdpt == NULL) return FALSE;

    /* 1 GB huge page  cannot carve a 4 KB mapping inside it */
    if (pdpt[pdpti] & PTE_HUGE) return FALSE;

    pd = paging_walk_level(pdpt, pdpti, TRUE);
    if (pd == NULL) return FALSE;

    /* 2 MB huge page  cannot carve a 4 KB mapping inside it */
    if (pd[pdi] & PTE_HUGE) return FALSE;

    pt = paging_walk_level(pd, pdi, TRUE);
    if (pt == NULL) return FALSE;

    pt[pti] = (phys & PTE_ADDR_MASK) | flags | PTE_PRESENT;
    invlpg(virt);
    return TRUE;
}

Unmapping follows the same index computation but stops early if an intermediate table or mapping is missing or a huge-page mapping is in place:

void paging_unmap_page(UINT64 virt)
{
    UINT64 *pml4, *pdpt, *pd, *pt;
    UINTN pml4i, pdpti, pdi, pti;

    pml4i = (virt >> 39) & 0x1FF;
    pdpti = (virt >> 30) & 0x1FF;
    pdi   = (virt >> 21) & 0x1FF;
    pti   = (virt >> 12) & 0x1FF;

    pml4 = get_pml4();

    pdpt = paging_walk_level(pml4, pml4i, FALSE);
    if (pdpt == NULL) return;
    if (pdpt[pdpti] & PTE_HUGE) return;

    pd = paging_walk_level(pdpt, pdpti, FALSE);
    if (pd == NULL) return;
    if (pd[pdi] & PTE_HUGE) return;

    pt = paging_walk_level(pd, pdi, FALSE);
    if (pt == NULL) return;

    pt[pti] = 0;
    invlpg(virt);
}

Virtual-to-physical translation

paging_get_phys walks the existing hierarchy without allocating anything, and supports 4 KiB, 2 MiB, and 1 GiB mappings:

UINT64 paging_get_phys(UINT64 virt)
{
    UINT64 *pml4, *pdpt, *pd, *pt;
    UINTN pml4i, pdpti, pdi, pti;

    pml4i = (virt >> 39) & 0x1FF;
    pdpti = (virt >> 30) & 0x1FF;
    pdi   = (virt >> 21) & 0x1FF;
    pti   = (virt >> 12) & 0x1FF;

    pml4 = get_pml4();

    if (!(pml4[pml4i] & PTE_PRESENT)) return 0;
    pdpt = (UINT64 *)(UINTN)(pml4[pml4i] & PTE_ADDR_MASK);

    if (!(pdpt[pdpti] & PTE_PRESENT)) return 0;
    if (pdpt[pdpti] & PTE_HUGE) {
        /* 1 GB page */
        return (pdpt[pdpti] & 0x000FFFFFC0000000ULL) | (virt & 0x3FFFFFFFULL);
    }
    pd = (UINT64 *)(UINTN)(pdpt[pdpti] & PTE_ADDR_MASK);

    if (!(pd[pdi] & PTE_PRESENT)) return 0;
    if (pd[pdi] & PTE_HUGE) {
        /* 2 MB page */
        return (pd[pdi] & 0x000FFFFFFFE00000ULL) | (virt & 0x1FFFFFULL);
    }
    pt = (UINT64 *)(UINTN)(pd[pdi] & PTE_ADDR_MASK);

    if (!(pt[pti] & PTE_PRESENT)) return 0;
    return (pt[pti] & PTE_ADDR_MASK) | (virt & 0xFFFULL);
}

This function is useful for diagnostics and for checking assumptions about how the firmware identity-mapped memory before entering the kernel.


Heap allocator

The heap allocator builds on top of the PMM to provide kmalloc/kfree semantics. It uses a singly linked list of heap blocks (HeapBlock), each containing metadata and a size field describing the payload.

Initialisation

heap_init obtains an initial contiguous region of heap memory and seeds the free list with a single large free block:

void heap_init(BootInfo *Boot)
{
    UINT64 phys;
    UINTN  heap_size;

    phys = pmm_alloc_pages(HEAP_INITIAL_PAGES);
    if (phys == 0) {
        SAFE_PRINT(Boot, L"  Heap: failed to allocate pages\n\r");
        return;
    }

    heap_size  = HEAP_INITIAL_PAGES * PAGE_SIZE;
    heap_start = (HeapBlock *)(UINTN)phys;

    heap_start->magic = HEAP_BLOCK_MAGIC;
    heap_start->state = HEAP_BLOCK_FREE;
    heap_start->size  = heap_size - sizeof(HeapBlock);
    heap_start->next  = NULL;
    heap_start->prev  = NULL;

    heap_ready = TRUE;

    SAFE_PRINT(Boot, L"  Heap: %d KB at 0x%lx\n\r",
               heap_size / 1024, phys);
}

The allocator assumes that the physical address returned by pmm_alloc_pages is accessible via identity mapping, so it can cast it directly to a HeapBlock *.

Alignment helper

Allocations are rounded up to a fixed alignment (e.g., 16 bytes) using align_up:

static UINTN align_up(UINTN val, UINTN align)
{
    return (val + align - 1) & ~(align - 1);
}

Allocation (kmalloc)

kmalloc performs a first-fit search of the free list:

void *kmalloc(UINTN size)
{
    HeapBlock *block, *split;
    UINTN aligned;

    if (!heap_ready || size == 0) {
        return NULL;
    }

    aligned = align_up(size, HEAP_ALIGN);

    for (block = heap_start; block != NULL; block = block->next) {
        if (block->magic != HEAP_BLOCK_MAGIC) {
            return NULL;            /* heap corruption */
        }

        if (block->state != HEAP_BLOCK_FREE || block->size < aligned) {
            continue;
        }

        /* Try to split if there is room for another header + 16 bytes */
        if (block->size >= aligned + sizeof(HeapBlock) + HEAP_ALIGN) {
            split = (HeapBlock *)((UINT8 *)block + sizeof(HeapBlock) + aligned);
            split->magic = HEAP_BLOCK_MAGIC;
            split->state = HEAP_BLOCK_FREE;
            split->size  = block->size - aligned - sizeof(HeapBlock);
            split->next  = block->next;
            split->prev  = block;

            if (block->next != NULL) {
                block->next->prev = split;
            }

            block->next = split;
            block->size = aligned;
        }

        block->state = HEAP_BLOCK_USED;
        return (void *)((UINT8 *)block + sizeof(HeapBlock));
    }

    return NULL;                    /* out of heap memory */
}

Notable details:

  • Corruption detection checks HEAP_BLOCK_MAGIC for each block; any mismatch aborts with NULL.
  • Splitting if the free block is large enough, it is split into:
    • An allocated block of exactly aligned bytes.
    • A new trailing free block (split) with its own header.
  • Alignment the returned pointer is sizeof(HeapBlock) bytes after the header and aligned according to HEAP_ALIGN.

Freeing (kfree) and coalescing

kfree marks a block as free and then attempts to coalesce with neighboring free blocks to combat fragmentation:

void kfree(void *ptr)
{
    HeapBlock *block;

    if (ptr == NULL || !heap_ready) {
        return;
    }

    block = (HeapBlock *)((UINT8 *)ptr - sizeof(HeapBlock));

    if (block->magic != HEAP_BLOCK_MAGIC || block->state != HEAP_BLOCK_USED) {
        return;                     /* bad pointer or double-free */
    }

    block->state = HEAP_BLOCK_FREE;

    /* Coalesce with next neighbour */
    if (block->next != NULL
        && block->next->magic == HEAP_BLOCK_MAGIC
        && block->next->state == HEAP_BLOCK_FREE) {
        block->size += sizeof(HeapBlock) + block->next->size;
        block->next  = block->next->next;
        if (block->next != NULL) {
            block->next->prev = block;
        }
    }

    /* Coalesce with previous neighbour */
    if (block->prev != NULL
        && block->prev->magic == HEAP_BLOCK_MAGIC
        && block->prev->state == HEAP_BLOCK_FREE) {
        block->prev->size += sizeof(HeapBlock) + block->size;
        block->prev->next  = block->next;
        if (block->next != NULL) {
            block->next->prev = block->prev;
        }
    }
}

The allocator never returns memory to the PMM; all heap pages remain reserved for heap use for the lifetime of the kernel.

Heap statistics

heap_get_stats walks the free list and aggregates total, used, and free bytes as well as block count:

void heap_get_stats(UINTN *total, UINTN *used, UINTN *free_mem,
                    UINTN *num_blocks)
{
    HeapBlock *b;

    *total = 0;  *used = 0;  *free_mem = 0;  *num_blocks = 0;

    if (!heap_ready) return;

    for (b = heap_start; b != NULL && b->magic == HEAP_BLOCK_MAGIC;
         b = b->next) {
        (*num_blocks)++;
        *total += b->size;
        if (b->state == HEAP_BLOCK_USED) {
            *used += b->size;
        } else {
            *free_mem += b->size;
        }
    }
}

These statistics are surfaced to the user via the mem and memtest commands.


Runtime memory diagnostics (mem and memtest)

The mem command (in commands.c) prints a snapshot of PMM and heap state by calling memory_print_stats. Access requires TASK_PRIV_KERNEL:

void memory_print_stats(BootInfo *Boot)
{
    UINTN h_total, h_used, h_free, h_blocks;
    UINTN p_total, p_free, p_used;
    Task *caller;

    /* Subsystem-level privilege enforcement: memory stats require KERNEL. */
    caller = task_current();
    if (caller != NULL && task_get_privilege(caller) < TASK_PRIV_KERNEL) {
        SAFE_PRINT(Boot, L"Permission denied: memory stats require kernel privilege.\n\r");
        return;
    }

    p_total = pmm_get_total_pages();
    p_free  = pmm_get_free_pages();
    p_used  = p_total - p_free;

    heap_get_stats(&h_total, &h_used, &h_free, &h_blocks);

    SAFE_PRINT(Boot, L"\n\r");
    SAFE_PRINT(Boot, L"Memory Statistics\n\r");
    SAFE_PRINT(Boot, L"================================================\n\r");
    ...
    SAFE_PRINT(Boot, L"Paging:\n\r");
    SAFE_PRINT(Boot, L"  CR3:  0x%lx\n\r", read_cr3());
    SAFE_PRINT(Boot, L"  Mode: 4-level (PML4)\n\r");
    SAFE_PRINT(Boot, L"\n\r");
}

The memtest command runs a scripted set of tests that exercise heap allocation, heap free/coalescing, and PMM single- and multi-page allocation. It also enforces TASK_PRIV_KERNEL:

static void cmd_memtest(BootInfo *Boot, CHAR16 *Args)
{
    void *ptrs[8];
    UINTN sizes[] = { 16, 64, 128, 256, 512, 1024, 2048, 4096 };
    UINTN i;
    UINT64 page;
    UINTN h_total, h_used, h_free, h_blocks;
    Task *caller;
    (void)Args;

    /* Subsystem-level privilege enforcement: memtest requires KERNEL. */
    caller = task_current();
    if (caller != NULL && task_get_privilege(caller) < TASK_PRIV_KERNEL) {
        SAFE_PRINT(Boot, L"Permission denied: memtest requires kernel privilege.\n\r");
        return;
    }

    SAFE_PRINT(Boot, L"\n\r");
    SAFE_PRINT(Boot, L"Memory Test\n\r");
    SAFE_PRINT(Boot, L"================================================\n\r");
    ...
    /* --- Heap allocation test --- */
    ...
    /* --- Heap free test --- */
    ...
    /* --- PMM page allocation test --- */
    ...
    /* --- Multi-page allocation test --- */
    ...
    SAFE_PRINT(Boot, L"\n\rAll memory tests completed.\n\r\n\r");
}

These commands provide a convenient way to validate memory subsystem behaviour from the Starling Terminal without needing an external debugger.