diff --git a/docs/commands-and-terminal.md b/docs/commands-and-terminal.md new file mode 100644 index 0000000..a69aa85 --- /dev/null +++ b/docs/commands-and-terminal.md @@ -0,0 +1,491 @@ +## Starling Terminal and command pipeline + +User interaction with the kernel is mediated by the **Starling Terminal** and a command registry implemented in `kernel.c` and `commands.c`. + +- The **Starling Terminal** is a task that runs a read–eval–print loop, reading keystrokes via `BootInfo` services and dispatching commands. +- The **command subsystem** maintains a table of commands, each with a name, description, usage string, and handler function. +- Each command typically runs in its own task so that long-running work does not block the terminal. + +--- + +## Starling Terminal task + +The Starling Terminal is implemented as a task entry function in `kernel.c`: + +```47:163:/home/lochlan/Documents/Coding/c/os/kernel.c +static void starling_terminal_task(void *arg) +{ + StarlingContext *ctx = (StarlingContext *)arg; + BootInfo *Boot = NULL; + KeyEvent Key; + KSTATUS Status; + UINTN read_errors = 0; + CHAR16 line[128]; + UINTN len = 0; + UINTN depth = 0; + + if (ctx == NULL || ctx->Boot == NULL) { + return; + } + + Boot = ctx->Boot; + depth = ctx->depth; + + SAFE_PRINT(Boot, L"\n\r[Starling Terminal depth %d] ready.\n\r\n\r", depth); + SAFE_PRINT(Boot, L"starling> "); + + while (TRUE) { + /* Try non-blocking read first; yield to other tasks while idle */ + if (Boot->try_read_key != NULL) { + Status = Boot->try_read_key(&Key); + if (Status != 0) { + task_yield(); + continue; + } + } else if (Boot->read_key != NULL) { + Status = Boot->read_key(&Key); + } else { + SAFE_PRINT(Boot, L"Console input unavailable.\n\r"); + break; + } + + if (Status != 0) { + read_errors++; + if (read_errors == 1 || (read_errors % 64) == 0) { + SAFE_PRINT(Boot, L"read_key failed (status=%ld)\n\r", + (UINT64)Status); + } + continue; + } + read_errors = 0; + + if (Key.unicode_char == L'\r' || Key.unicode_char == L'\n') { + /* Enter pressed: execute the buffered command */ + line[len] = L'\0'; + SAFE_PRINT(Boot, L"\n\r"); + trim_spaces_inplace(line); + ... + } else { + Task *cmd_task = execute_command(Boot, line); + + /* If a command task was spawned, wait for it to finish. */ + if (cmd_task != NULL) { + task_wait(cmd_task); + } + + /* Reset for next command */ + len = 0; + SAFE_PRINT(Boot, L"starling> "); + } + } else if (Key.scan_code == 0x08 || Key.unicode_char == L'\b' || Key.unicode_char == 0x7F) { + /* Backspace */ + if (len > 0) { + len--; + SAFE_PRINT(Boot, L"\b \b"); + } + } else if (Key.unicode_char >= 32 && Key.unicode_char < 127) { + /* Printable ASCII */ + if (len < (sizeof(line) / sizeof(line[0]) - 1)) { + line[len++] = Key.unicode_char; + SAFE_PRINT(Boot, L"%c", Key.unicode_char); + } + } + } + + /* Free our context on exit (allocated by the spawner). */ + kfree(ctx); +} +``` + +Notable design choices: + +- **Non-blocking polling**: When `Boot->try_read_key` is available, the terminal uses it and calls `task_yield` if no key is present. This avoids monopolising the CPU while idle. +- **Line editing**: A fixed-size buffer `line[128]` accumulates ASCII characters. Backspace decrements `len` and erases the last character on screen. +- **Command execution**: When the user presses Enter, the line is trimmed and either: + - Handled as a built-in shell control command (`exit`, `starling`). + - Sent to `execute_command` for lookup and execution. +- **Nested shells**: The `starling` command spawns a nested Starling Terminal task with increased `depth`, allowing recursive shells. + +--- + +## Spawning the terminal + +`kmain` spawns the initial Starling Terminal as its own task and then turns the core thread into an idle loop: + +```221:253:/home/lochlan/Documents/Coding/c/os/kernel.c +ctx = (StarlingContext *)kmalloc(sizeof(StarlingContext)); +if (ctx == NULL) { + SAFE_PRINT(Boot, L"Failed to allocate Starling Terminal context; starting inline.\n\r"); + StarlingContext inline_ctx; + inline_ctx.Boot = Boot; + inline_ctx.depth = 0; + starling_terminal_task(&inline_ctx); + return; +} + +ctx->Boot = Boot; +ctx->depth = 0; + +terminal_task = task_create(L"starling-term", starling_terminal_task, ctx); +if (terminal_task == NULL) { + SAFE_PRINT(Boot, L"Failed to start Starling Terminal task; falling back to kernel loop.\n\r"); + ... + starling_terminal_task(Boot); + return; +} + +SAFE_PRINT(Boot, L"[core] Started Starling Terminal (PID %d).\n\r", terminal_task->pid); + +/* Core thread becomes an idle loop, yielding to the terminal and others. */ +while (TRUE) { + task_yield(); +} +``` + +This ensures that: + +- The terminal runs as a **regular task** managed by the cooperative scheduler. +- The core thread remains available to run other tasks or future subsystems, rather than being permanently blocked in terminal I/O. + +--- + +## Command registry (`commands[]`) + +The command registry is defined in `commands.c` as a static array: + +```73:144:/home/lochlan/Documents/Coding/c/os/commands.c +static Command commands[] = { + { + L"shutdown", + L"Shutdown the system", + L"Usage: shutdown\n\r Initiates a system shutdown using UEFI runtime services.", + cmd_shutdown + }, + { + L"help", + L"Display available commands", + L"Usage: help\n\r Lists all available commands with brief descriptions.", + cmd_help + }, + { + L"man", + L"Display manual page for a command", + L"Usage: man \n\r Shows detailed help for the specified command.", + cmd_man + }, + { + L"clear", + L"Clear the screen", + L"Usage: clear\n\r Clears the console screen.", + cmd_clear + }, + { + L"about", + L"Display system information", + L"Usage: about\n\r Shows information about this operating system.", + cmd_about + }, + { + L"mem", + L"Display memory statistics", + L"Usage: mem\n\r Shows physical memory, heap, and paging information.", + cmd_mem + }, + { + L"ps", + L"List running tasks", + L"Usage: ps\n\r Displays all active tasks with PID, state, and name.", + cmd_ps + }, + { + L"spawn", + L"Spawn a demo background task", + L"Usage: spawn [name]\n\r Creates a cooperative demo task.\n\r Optional argument sets the task name.", + cmd_spawn + }, + { + L"memtest", + L"Test memory allocation and deallocation", + ... + cmd_memtest + }, + { + L"tasktest", + L"Test task scheduler with multiple tasks", + ... + cmd_tasktest + }, + {NULL, NULL, NULL, NULL} /* sentinel */ +}; +``` + +Each `Command` entry includes: + +- `name` – the token typed at the prompt. +- `description` – a short summary used by `help`. +- `usage` – a longer description and usage details for `man`. +- `handler` – a function of type: + +```14:19:/home/lochlan/Documents/Coding/c/os/commands.h +typedef void (*CommandHandlerFn)(BootInfo *Boot, CHAR16 *Args); +``` + +To add a new command, follow the guide in the file header: + +```8:12:/home/lochlan/Documents/Coding/c/os/commands.c + * To add a new command: + * 1. Write a static handler cmd_foo(BootInfo *Boot, CHAR16 *Args) + * 2. Add a forward declaration above the table + * 3. Append an entry to commands[] (before the sentinel) +``` + +--- + +## Command execution pipeline + +The central function that processes a line of user input is `execute_command`: + +```493:557:/home/lochlan/Documents/Coding/c/os/commands.c +Task *execute_command(BootInfo *Boot, CHAR16 *Input) +{ + CHAR16 *cmd_start = NULL; + CHAR16 *args_start = NULL; + UINTN i = 0; + CommandTaskContext *ctx; + Task *t; + + if (Boot == NULL || Input == NULL) { + return NULL; + } + + trim_spaces_inplace(Input); + + if (Input[0] == L'\0') { + return NULL; + } + + /* Split input into command and argument strings */ + cmd_start = Input; + args_start = Input; + + /* Advance past the command keyword */ + while (*args_start != L'\0' && !is_space16(*args_start)) { + args_start++; + } + + /* NUL-terminate the command and skip leading whitespace in args */ + if (*args_start != L'\0') { + *args_start = L'\0'; + args_start++; + + /* skip leading whitespace in args */ + while (*args_start != L'\0' && is_space16(*args_start)) { + args_start++; + } + } + + /* Look up and dispatch the command */ + for (i = 0; commands[i].name != NULL; i++) { + if (ascii_streq_ci(cmd_start, commands[i].name)) { + /* Allocate a context block for the command task. */ + ctx = (CommandTaskContext *)kmalloc(sizeof(CommandTaskContext)); + if (ctx == NULL) { + SAFE_PRINT(Boot, L"Failed to allocate command context; running in core thread.\n\r"); + commands[i].handler(Boot, args_start); + return NULL; + } + + ctx->Boot = Boot; + ctx->handler = commands[i].handler; + wstrcpy16_local(ctx->args, args_start, sizeof(ctx->args) / sizeof(ctx->args[0])); + + t = task_create(commands[i].name, command_task_entry, ctx); + if (t == NULL) { + SAFE_PRINT(Boot, L"Failed to create task for command '%s'; running in core thread.\n\r", + commands[i].name); + kfree(ctx); + commands[i].handler(Boot, args_start); + return NULL; + } + + SAFE_PRINT(Boot, L"[starling] spawned '%s' as PID %d\n\r", t->name, t->pid); + return t; + } + } + + /* Command not found */ + SAFE_PRINT(Boot, L"Unknown command: %s\n\r", cmd_start); + SAFE_PRINT(Boot, L"Type 'help' for a list of available commands.\n\r"); + return NULL; +} +``` + +Pipeline stages: + +1. **Normalisation**: + - `trim_spaces_inplace` removes leading/trailing spaces. + - Empty lines are ignored. +2. **Tokenisation**: + - `cmd_start` points to the command token. + - `args_start` is advanced past the command; the first whitespace is replaced with `L'\0'`, splitting the string in-place. + - Leading whitespace in `args_start` is skipped. +3. **Lookup**: + - `commands[]` is scanned for a name that matches `cmd_start` using case-insensitive `ascii_streq_ci`. +4. **Dispatch**: + - On match, a `CommandTaskContext` is allocated via `kmalloc` and filled with: + - `Boot` pointer. + - Handler function. + - A bounded copy of the argument string. + - A new task is created via `task_create` with: + - Task name = command name. + - Entry = `command_task_entry`. + - Argument = pointer to the context. + - If task creation fails, the command handler is run synchronously in the current thread as a fallback. + +The terminal then optionally `task_wait`s on the returned `Task *`, serialising command execution from the user's perspective while still letting the scheduler run other tasks (e.g., background demos). + +--- + +## Command task entry and context + +Command handlers are executed in a dedicated task, whose entry function is `command_task_entry`: + +```44:50:/home/lochlan/Documents/Coding/c/os/commands.c +typedef struct { + BootInfo *Boot; + CommandHandlerFn handler; + CHAR16 args[128]; +} CommandTaskContext; + +static void command_task_entry(void *arg); +``` + +Implementation: + +```570:587:/home/lochlan/Documents/Coding/c/os/commands.c +static void command_task_entry(void *arg) +{ + CommandTaskContext *ctx = (CommandTaskContext *)arg; + BootInfo *Boot = NULL; + + if (ctx == NULL) { + return; + } + + Boot = ctx->Boot; + + if (ctx->handler != NULL) { + ctx->handler(Boot, ctx->args); + } + + /* Context was heap-allocated in execute_command. */ + kfree(ctx); +} +``` + +This design gives each command: + +- Its own stack, independent of the terminal. +- Its own argument buffer, isolated from the terminal's input buffer. +- Automatic cleanup of the context when the command finishes. + +--- + +## Built-in commands + +Some notable built-in handlers: + +- **System control**: + - `shutdown` → `cmd_shutdown` calls `Boot->shutdown` via `request_shutdown` to power off the machine. + - `clear` → `cmd_clear` uses `Boot->clear_screen` to wipe the console. +- **Information and help**: + - `help` → `cmd_help` calls `show_help` to print a formatted table of available commands. + - `man` → `cmd_man` prints the `usage` field for a specific command. + - `about` → `cmd_about` prints OS information and feature list. +- **Diagnostics**: + - `mem` → `cmd_mem` calls `memory_print_stats` to show PMM and heap state. + - `ps` → `cmd_ps` calls `task_print_list` to show current tasks. + - `memtest` → `cmd_memtest` exercises heap and PMM allocations. + - `tasktest` → `cmd_tasktest` spawns multiple worker tasks to demonstrate cooperative scheduling. +- **Tasking demo**: + - `spawn` → `cmd_spawn` creates a demonstration task using `demo_task_fn`, which yields in a loop and reports progress. + +Examples: + +```248:252:/home/lochlan/Documents/Coding/c/os/commands.c +static void cmd_mem(BootInfo *Boot, CHAR16 *Args) +{ + (void)Args; + memory_print_stats(Boot); +} +``` + +```275:279:/home/lochlan/Documents/Coding/c/os/commands.c +static void cmd_ps(BootInfo *Boot, CHAR16 *Args) +{ + (void)Args; + task_print_list(Boot); +} +``` + +From a user's perspective: + +- Commands are discoverable via `help` and `man`. +- Many commands provide deep insight into internal subsystems (memory, tasks) without requiring external tooling. + +--- + +## Adding new commands + +To add a new command `foo`: + +1. **Declare the handler** near the top of `commands.c`: + +```32:41:/home/lochlan/Documents/Coding/c/os/commands.c +static void cmd_shutdown(BootInfo *Boot, CHAR16 *Args); +static void cmd_help(BootInfo *Boot, CHAR16 *Args); +... +static void cmd_tasktest(BootInfo *Boot, CHAR16 *Args); +/* Add: */ +static void cmd_foo(BootInfo *Boot, CHAR16 *Args); +``` + +2. **Implement the handler**: + +```200:207:/home/lochlan/Documents/Coding/c/os/commands.c +static void cmd_shutdown(BootInfo *Boot, CHAR16 *Args) +{ + (void)Args; + SAFE_PRINT(Boot, L"Shutting down...\n\r"); + request_shutdown(Boot); +} +``` + +Use this as a template for `cmd_foo`, replacing the body with your logic and using `SAFE_PRINT` for output. + +3. **Register the command** in `commands[]` before the sentinel: + +```140:143:/home/lochlan/Documents/Coding/c/os/commands.c + { + L"tasktest", + ... + cmd_tasktest + }, + {NULL, NULL, NULL, NULL} /* sentinel */ +``` + +Insert a new block above the sentinel: + +```c + { + L"foo", + L"One-line description", + L"Usage: foo [args]\n\r Detailed explanation...", + cmd_foo + }, +``` + +4. Rebuild and run. Typing `foo` at the `starling>` prompt will now execute your handler in its own task. + +This extensible design makes it straightforward to grow the OS with additional diagnostics, demos, or experimental subsystems, all accessible from the interactive shell. + diff --git a/docs/interrupts-and-exceptions.md b/docs/interrupts-and-exceptions.md new file mode 100644 index 0000000..4302264 --- /dev/null +++ b/docs/interrupts-and-exceptions.md @@ -0,0 +1,269 @@ +## Interrupt and exception handling overview + +The kernel's interrupt/exception handling is implemented in `idt.c` and a companion assembly file `isr.S` (not shown here). The design goals are: + +- Preserve the firmware's existing hardware interrupt handlers where possible. +- Override CPU **exception vectors 0–31** with kernel stubs that route into a C dispatcher. +- Print detailed diagnostics for exceptions and halt on unrecoverable faults. + +--- + +## IDT representation + +The x86-64 Interrupt Descriptor Table (IDT) is represented as an array of packed 16-byte entries: + +```22:37:/home/lochlan/Documents/Coding/c/os/idt.c +typedef struct { + UINT16 offset_low; /* bits 0-15 of handler address */ + UINT16 selector; /* code segment selector */ + UINT8 ist; /* interrupt stack table index */ + UINT8 type_attr; /* type and attributes */ + UINT16 offset_mid; /* bits 16-31 of handler address */ + UINT32 offset_high; /* bits 32-63 of handler address */ + UINT32 zero; /* reserved, must be zero */ +} __attribute__((packed)) IdtEntry; + +typedef struct { + UINT16 limit; + UINT64 base; +} __attribute__((packed)) IdtPtr; +``` + +Module state: + +```42:47:/home/lochlan/Documents/Coding/c/os/idt.c +static IdtEntry idt[IDT_SIZE]; +static BootInfo *gBoot = NULL; + +/* Defined in isr.S – one stub function per vector (0-255). */ +extern void (*isr_stub_table[])(void); +``` + +Each element of `isr_stub_table` is a low-level assembly stub that: + +- Saves CPU state into an `ISRFrame`. +- Calls the common C dispatcher `isr_handler`. +- Restores state and performs `iretq` (for IRQs) or loops into a halt (for fatal faults). + +--- + +## Installing IDT entries + +`idt_set_gate` encodes a handler function pointer into an IDT entry: + +```53:67:/home/lochlan/Documents/Coding/c/os/idt.c +static void idt_set_gate(UINTN index, void (*handler)(void)) +{ + UINT64 addr = (UINT64)(UINTN)handler; + UINT16 selector = 0; + + __asm__ __volatile__("mov %%cs, %0" : "=r"(selector)); + + idt[index].offset_low = (UINT16)(addr & 0xFFFF); + idt[index].selector = selector; + idt[index].ist = 0; + idt[index].type_attr = IDT_TYPE_INTERRUPT; + idt[index].offset_mid = (UINT16)((addr >> 16) & 0xFFFF); + idt[index].offset_high = (UINT32)((addr >> 32) & 0xFFFFFFFF); + idt[index].zero = 0; +} +``` + +`lidt` loads a new IDTR: + +```69:73:/home/lochlan/Documents/Coding/c/os/idt.c +static void lidt(const IdtPtr *idtr) +{ + __asm__ __volatile__("lidt (%0)" :: "r"(idtr)); +} +``` + +--- + +## Exception names + +For better diagnostics, the kernel maps exception vectors to human-readable names: + +```80:116:/home/lochlan/Documents/Coding/c/os/idt.c +static const CHAR16 *exception_name(UINTN vector) +{ + switch (vector) { + case 0: return L"Divide Error"; + case 1: return L"Debug"; + ... + case 14: return L"Page Fault"; + ... + case 30: return L"Security"; + case 31: return L"Reserved"; + default: return L"Unknown"; + } +} +``` + +These strings are used by `isr_handler` when printing exception banners. + +--- + +## PIC helpers and halting + +The kernel provides minimal support for acknowledging legacy PIC interrupts and halting the CPU in fatal cases: + +```124:136:/home/lochlan/Documents/Coding/c/os/idt.c +static inline void outb(UINT16 port, UINT8 value) +{ + __asm__ __volatile__("outb %0, %1" :: "a"(value), "Nd"(port)); +} + +static void pic_eoi(UINTN vector) +{ + if (vector >= 40) { + outb(0xA0, 0x20); /* EOI to slave PIC */ + } + outb(0x20, 0x20); /* EOI to master PIC */ +} +``` + +`halt_forever` disables interrupts and executes `hlt` in an infinite loop: + +```138:144:/home/lochlan/Documents/Coding/c/os/idt.c +static void halt_forever(void) +{ + for (;;) { + __asm__ __volatile__("cli; hlt"); + } +} +``` + +--- + +## ISR dispatcher (`isr_handler`) + +`isr_handler` is the central C function that receives control from all exception and interrupt stubs: + +```150:177:/home/lochlan/Documents/Coding/c/os/idt.c +void isr_handler(ISRFrame *frame) +{ + UINT64 cr2 = 0; + + /* Hardware IRQs (vectors 32-47): send EOI and return */ + if (frame->vector >= 32 && frame->vector <= 47) { + pic_eoi(frame->vector); + return; + } + + /* CPU exceptions (vectors 0-31): print diagnostics and halt */ + if (gBoot != NULL && gBoot->print != NULL) { + gBoot->print(L"\n\rEXCEPTION: %d (%s)\n\r", frame->vector, + exception_name(frame->vector)); + gBoot->print(L" Error Code: 0x%lx\n\r", frame->error_code); + gBoot->print(L" RIP: 0x%lx CS: 0x%lx RFLAGS: 0x%lx\n\r", + frame->rip, frame->cs, frame->rflags); + } + + if (frame->vector == 14) { + __asm__ __volatile__("mov %%cr2, %0" : "=r"(cr2)); + if (gBoot != NULL && gBoot->print != NULL) { + gBoot->print(L" CR2: 0x%lx\n\r", cr2); + } + } + + halt_forever(); +} +``` + +The `ISRFrame` structure (defined in `idt.h`) contains the state saved by the assembly stubs, including: + +- Exception/interrupt vector number. +- Error code (if applicable). +- `RIP`, `CS`, and `RFLAGS` at the time of the fault. + +The handler's behaviour is: + +- For **hardware IRQs** (vectors 32–47): + - Send an **End Of Interrupt** (EOI) to the PIC. + - Return to the interrupted context. +- For **CPU exceptions** (vectors 0–31): + - Print a diagnostic header with the vector number and name. + - Show error code and execution context (`RIP`, `CS`, `RFLAGS`). + - For page faults (vector 14), read and print CR2 (faulting virtual address). + - Halt the system via `halt_forever`. + +This makes debugging faults significantly easier when running under QEMU or on real hardware. + +--- + +## IDT initialisation (`idt_init`) + +`idt_init` is called from `kmain` early during boot: + +```183:214:/home/lochlan/Documents/Coding/c/os/idt.c +void idt_init(BootInfo *Boot) +{ + IdtPtr old_idtr; + IdtPtr idtr; + IdtEntry *old_idt = NULL; + UINTN i = 0; + + gBoot = Boot; + + /* Read the firmware's existing IDT so we can preserve its entries */ + __asm__ __volatile__("sidt %0" : "=m"(old_idtr)); + old_idt = (IdtEntry *)(UINTN)old_idtr.base; + + /* Copy the entire existing IDT first (preserves firmware IRQ handlers) */ + for (i = 0; i < IDT_SIZE; i++) { + if (old_idt != NULL && (i * sizeof(IdtEntry)) < (UINTN)(old_idtr.limit + 1)) { + idt[i] = old_idt[i]; + } else { + idt_set_gate(i, isr_stub_table[i]); + } + } + + /* Override only CPU exception vectors (0-31) with our handlers */ + for (i = 0; i < 32; i++) { + idt_set_gate(i, isr_stub_table[i]); + } + + idtr.limit = (UINT16)(sizeof(idt) - 1); + idtr.base = (UINT64)(UINTN)idt; + + lidt(&idtr); +} +``` + +The process is: + +1. **Capture firmware IDT**: + - `sidt` reads the current IDTR into `old_idtr`. + - `old_idt` is set to the base of the firmware's IDT. +2. **Copy firmware entries**: + - For all indices `i` where the address lies within the firmware's IDT limit, copy the existing entry into the kernel's `idt` array. + - For indices beyond the firmware's limit, install the kernel's own stub from `isr_stub_table`. +3. **Override CPU exceptions**: + - For vectors 0–31, call `idt_set_gate` with the kernel's stubs, ensuring that exceptions are always handled by `isr_handler`. +4. **Activate new IDT**: + - Populate `idtr` with the address and size of the kernel's `idt`. + - Call `lidt` to load the new IDT. + +This approach preserves any firmware-installed handlers for higher interrupt vectors (e.g., hardware IRQs or system-specific events), while guaranteeing full control over CPU exception handling. + +--- + +## Interaction with the rest of the kernel + +The IDT/ISR subsystem interacts with other parts of the kernel in the following ways: + +- **BootInfo access**: + - `id_tinit` stores `Boot` in `gBoot` so that `isr_handler` can safely use `Boot->print` for diagnostics. +- **Memory subsystem**: + - `isr_handler` reads CR2 for page faults; combined with `paging_get_phys` from `memory.c`, this can be used to inspect paging state. +- **Tasks and scheduler**: + - The current implementation is **non-preemptive**: context switches happen only through explicit calls to `task_yield`, not timer interrupts. Exceptions still interrupt tasks asynchronously, but there is no timer tick driving the scheduler. +- **User-level diagnostics**: + - When an exception occurs, the on-screen diagnostics provide enough context to identify the type of fault and its location in the kernel, especially when used alongside a symbol-enabled build and external debugger. + +Future extensions might include: + +- Installing a periodic timer IRQ handler that calls `task_yield` to add preemptive scheduling. +- Extending `ISRFrame` and `isr_handler` with richer diagnostics or a kernel debugger stub. + diff --git a/docs/memory-and-allocation.md b/docs/memory-and-allocation.md new file mode 100644 index 0000000..b508187 --- /dev/null +++ b/docs/memory-and-allocation.md @@ -0,0 +1,622 @@ +## Memory management overview + +The kernel's memory subsystem is implemented in `memory.c` and exposes three layers: + +- **Physical Memory Manager (PMM)** – a bitmap-based page-frame allocator over a fixed-size pool obtained from the loader at boot. +- **Paging helpers** – routines to walk and extend the live 4-level x86-64 page tables, map/unmap virtual addresses, and translate virtual to physical. +- **Heap allocator** – a first-fit free-list allocator with block splitting and bidirectional coalescing, backed by pages from the PMM. + +All three layers are wired together and brought up by `memory_init`: + +```515:522:/home/lochlan/Documents/Coding/c/os/memory.c +void memory_init(BootInfo *Boot) +{ + SAFE_PRINT(Boot, L"Initializing memory management...\n\r"); + pmm_init(Boot); + paging_init(Boot); + heap_init(Boot); + SAFE_PRINT(Boot, L"Memory management ready.\n\r\n\r"); +} +``` + +--- + +## Physical Memory Manager (PMM) + +### Design + +The PMM manages a pool of 4 KiB physical page frames acquired from the loader via `BootInfo->alloc_pages`. It uses a simple **bitmap** to track free vs. allocated pages: + +```27:33:/home/lochlan/Documents/Coding/c/os/memory.c +static UINT64 pmm_pool_base = 0; +static UINTN pmm_total_pages = 0; +static UINTN pmm_free_count = 0; +static UINT8 pmm_bitmap[PMM_POOL_PAGES / 8]; +static BOOLEAN pmm_ready = FALSE; +``` + +Each bit in `pmm_bitmap` corresponds to a single page in the pool: + +- **0** – page is free. +- **1** – page is allocated. + +Helper functions manipulate these bits: + +```37:53:/home/lochlan/Documents/Coding/c/os/memory.c +static void pmm_set_bit(UINTN idx) +{ + pmm_bitmap[idx / 8] |= (UINT8)(1U << (idx % 8)); +} + +static void pmm_clear_bit(UINTN idx) +{ + pmm_bitmap[idx / 8] &= (UINT8)~(1U << (idx % 8)); +} + +static BOOLEAN pmm_test_bit(UINTN idx) +{ + return (pmm_bitmap[idx / 8] & (1U << (idx % 8))) != 0; +} +``` + +### Initialisation + +`pmm_init` obtains the underlying page pool from the loader and prepares the bitmap: + +```64:96:/home/lochlan/Documents/Coding/c/os/memory.c +void pmm_init(BootInfo *Boot) +{ + KSTATUS Status; + UINT64 pool_addr = 0; + UINTN i; + + /* Zero the bitmap – all pages start free */ + for (i = 0; i < sizeof(pmm_bitmap); i++) { + pmm_bitmap[i] = 0; + } + + if (Boot == NULL || Boot->alloc_pages == NULL) { + SAFE_PRINT(Boot, L"PMM: page allocator unavailable\n\r"); + return; + } + + Status = Boot->alloc_pages(PMM_POOL_PAGES, &pool_addr); + ... + + pmm_pool_base = (UINT64)pool_addr; + pmm_total_pages = PMM_POOL_PAGES; + pmm_free_count = PMM_POOL_PAGES; + pmm_ready = TRUE; + + SAFE_PRINT(Boot, L" PMM : %d pages (%d KB) at 0x%lx\n\r", + pmm_total_pages, + (pmm_total_pages * PAGE_SIZE) / 1024, + pmm_pool_base); +} +``` + +Instead of parsing the firmware's memory map, this OS delegates low-level page allocation to the loader via `BootInfo->alloc_pages`. The PMM then **sub-allocates** from this contiguous pool using its own bitmap. + +### Single-page allocation + +`pmm_alloc_page` scans the bitmap for the first free page, marks it allocated, and returns the physical address: + +```98:116:/home/lochlan/Documents/Coding/c/os/memory.c +UINT64 pmm_alloc_page(void) +{ + UINTN i; + + if (!pmm_ready || pmm_free_count == 0) { + return 0; + } + + for (i = 0; i < pmm_total_pages; i++) { + if (!pmm_test_bit(i)) { + pmm_set_bit(i); + pmm_free_count--; + return pmm_pool_base + ((UINT64)i * PAGE_SIZE); + } + } + + return 0; +} +``` + +The corresponding free operation validates the address and clears the bit: + +```119:132:/home/lochlan/Documents/Coding/c/os/memory.c +void pmm_free_page(UINT64 phys_addr) +{ + UINTN idx; + + if (!pmm_ready) return; + if (phys_addr < pmm_pool_base) return; + + idx = (UINTN)((phys_addr - pmm_pool_base) / PAGE_SIZE); + if (idx >= pmm_total_pages) return; + if (!pmm_test_bit(idx)) return; /* already free */ + + pmm_clear_bit(idx); + pmm_free_count++; +} +``` + +### Contiguous allocation + +For multi-page allocations, `pmm_alloc_pages` performs a **first-fit** search for a run of `count` consecutive free bits: + +```134:163:/home/lochlan/Documents/Coding/c/os/memory.c +UINT64 pmm_alloc_pages(UINTN count) +{ + UINTN i, j; + BOOLEAN found; + + if (!pmm_ready || count == 0 || count > pmm_total_pages + || pmm_free_count < count) { + return 0; + } + + for (i = 0; i + count <= pmm_total_pages; i++) { + found = TRUE; + for (j = 0; j < count; j++) { + if (pmm_test_bit(i + j)) { + found = FALSE; + i += j; /* skip past the used page */ + break; + } + } + if (found) { + for (j = 0; j < count; j++) { + pmm_set_bit(i + j); + } + pmm_free_count -= count; + return pmm_pool_base + ((UINT64)i * PAGE_SIZE); + } + } + + return 0; +} +``` + +`pmm_free_pages` simply calls `pmm_free_page` for each page in the range. + +--- + +## Paging helpers + +The paging layer operates directly on the current CR3 page table hierarchy and uses the PMM to allocate new page-table pages on demand. + +### Reading CR3 and locating the PML4 + +```186:204:/home/lochlan/Documents/Coding/c/os/memory.c +static UINT64 read_cr3(void) +{ + UINT64 cr3; + __asm__ __volatile__("mov %%cr3, %0" : "=r"(cr3)); + return cr3; +} + +static void invlpg(UINT64 addr) +{ + __asm__ __volatile__("invlpg (%0)" :: "r"(addr) : "memory"); +} + +static UINT64 *get_pml4(void) +{ + return (UINT64 *)(UINTN)(read_cr3() & PTE_ADDR_MASK); +} +``` + +- `read_cr3` returns the physical address of the current PML4. +- `get_pml4` masks off flag bits using `PTE_ADDR_MASK` and casts the result to a pointer, assuming identity mapping of low physical memory (as set up by the loader). + +`paging_init` logs the initial CR3 value for diagnostic purposes: + +```244:249:/home/lochlan/Documents/Coding/c/os/memory.c +void paging_init(BootInfo *Boot) +{ + SAFE_PRINT(Boot, L" Page: CR3 = 0x%lx (identity-mapped by loader)\n\r", + read_cr3()); +} +``` + +### Walking page-table levels + +`paging_walk_level` abstracts a single step down the PML4 → PDPT → PD → PT hierarchy: + +```211:238:/home/lochlan/Documents/Coding/c/os/memory.c +static UINT64 *paging_walk_level(UINT64 *table, UINTN index, BOOLEAN create) +{ + UINT64 *next; + UINTN i; + UINT64 page; + + if (table[index] & PTE_PRESENT) { + return (UINT64 *)(UINTN)(table[index] & PTE_ADDR_MASK); + } + + if (!create) { + return NULL; + } + + page = pmm_alloc_page(); + if (page == 0) { + return NULL; + } + + /* Zero the freshly-allocated page table */ + next = (UINT64 *)(UINTN)page; + for (i = 0; i < PAGE_SIZE / sizeof(UINT64); i++) { + next[i] = 0; + } + + table[index] = page | PTE_PRESENT | PTE_WRITABLE; + return next; +} +``` + +If `create` is true and the entry is missing, it: + +- Allocates a fresh page with `pmm_alloc_page`. +- Clears it. +- Installs it as the next-level table with base address + default flags (`PTE_PRESENT | PTE_WRITABLE`). + +### Mapping and unmapping pages + +To map a single 4 KiB page, the kernel: + +1. Decomposes the virtual address into PML4/PDPT/PD/PT indices. +2. Walks or creates intermediate tables. +3. Installs a PTE with the desired flags. +4. Invalidates the TLB entry with `invlpg`. + +```256:285:/home/lochlan/Documents/Coding/c/os/memory.c +BOOLEAN paging_map_page(UINT64 virt, UINT64 phys, UINT64 flags) +{ + UINT64 *pml4, *pdpt, *pd, *pt; + UINTN pml4i, pdpti, pdi, pti; + + pml4i = (virt >> 39) & 0x1FF; + pdpti = (virt >> 30) & 0x1FF; + pdi = (virt >> 21) & 0x1FF; + pti = (virt >> 12) & 0x1FF; + + pml4 = get_pml4(); + + pdpt = paging_walk_level(pml4, pml4i, TRUE); + if (pdpt == NULL) return FALSE; + + /* 1 GB huge page – cannot carve a 4 KB mapping inside it */ + if (pdpt[pdpti] & PTE_HUGE) return FALSE; + + pd = paging_walk_level(pdpt, pdpti, TRUE); + if (pd == NULL) return FALSE; + + /* 2 MB huge page – cannot carve a 4 KB mapping inside it */ + if (pd[pdi] & PTE_HUGE) return FALSE; + + pt = paging_walk_level(pd, pdi, TRUE); + if (pt == NULL) return FALSE; + + pt[pti] = (phys & PTE_ADDR_MASK) | flags | PTE_PRESENT; + invlpg(virt); + return TRUE; +} +``` + +Unmapping follows the same index computation but stops early if an intermediate table or mapping is missing or a huge-page mapping is in place: + +```288:314:/home/lochlan/Documents/Coding/c/os/memory.c +void paging_unmap_page(UINT64 virt) +{ + UINT64 *pml4, *pdpt, *pd, *pt; + UINTN pml4i, pdpti, pdi, pti; + + pml4i = (virt >> 39) & 0x1FF; + pdpti = (virt >> 30) & 0x1FF; + pdi = (virt >> 21) & 0x1FF; + pti = (virt >> 12) & 0x1FF; + + pml4 = get_pml4(); + + pdpt = paging_walk_level(pml4, pml4i, FALSE); + if (pdpt == NULL) return; + if (pdpt[pdpti] & PTE_HUGE) return; + + pd = paging_walk_level(pdpt, pdpti, FALSE); + if (pd == NULL) return; + if (pd[pdi] & PTE_HUGE) return; + + pt = paging_walk_level(pd, pdi, FALSE); + if (pt == NULL) return; + + pt[pti] = 0; + invlpg(virt); +} +``` + +### Virtual-to-physical translation + +`paging_get_phys` walks the existing hierarchy without allocating anything, and supports 4 KiB, 2 MiB, and 1 GiB mappings: + +```320:351:/home/lochlan/Documents/Coding/c/os/memory.c +UINT64 paging_get_phys(UINT64 virt) +{ + UINT64 *pml4, *pdpt, *pd, *pt; + UINTN pml4i, pdpti, pdi, pti; + + pml4i = (virt >> 39) & 0x1FF; + pdpti = (virt >> 30) & 0x1FF; + pdi = (virt >> 21) & 0x1FF; + pti = (virt >> 12) & 0x1FF; + + pml4 = get_pml4(); + + if (!(pml4[pml4i] & PTE_PRESENT)) return 0; + pdpt = (UINT64 *)(UINTN)(pml4[pml4i] & PTE_ADDR_MASK); + + if (!(pdpt[pdpti] & PTE_PRESENT)) return 0; + if (pdpt[pdpti] & PTE_HUGE) { + /* 1 GB page */ + return (pdpt[pdpti] & 0x000FFFFFC0000000ULL) | (virt & 0x3FFFFFFFULL); + } + pd = (UINT64 *)(UINTN)(pdpt[pdpti] & PTE_ADDR_MASK); + + if (!(pd[pdi] & PTE_PRESENT)) return 0; + if (pd[pdi] & PTE_HUGE) { + /* 2 MB page */ + return (pd[pdi] & 0x000FFFFFFFE00000ULL) | (virt & 0x1FFFFFULL); + } + pt = (UINT64 *)(UINTN)(pd[pdi] & PTE_ADDR_MASK); + + if (!(pt[pti] & PTE_PRESENT)) return 0; + return (pt[pti] & PTE_ADDR_MASK) | (virt & 0xFFFULL); +} +``` + +This function is useful for diagnostics and for checking assumptions about how the firmware identity-mapped memory before entering the kernel. + +--- + +## Heap allocator + +The heap allocator builds on top of the PMM to provide `kmalloc`/`kfree` semantics. It uses a singly linked list of **heap blocks** (`HeapBlock`), each containing metadata and a `size` field describing the payload. + +### Initialisation + +`heap_init` obtains an initial contiguous region of heap memory and seeds the free list with a single large free block: + +```370:394:/home/lochlan/Documents/Coding/c/os/memory.c +void heap_init(BootInfo *Boot) +{ + UINT64 phys; + UINTN heap_size; + + phys = pmm_alloc_pages(HEAP_INITIAL_PAGES); + if (phys == 0) { + SAFE_PRINT(Boot, L" Heap: failed to allocate pages\n\r"); + return; + } + + heap_size = HEAP_INITIAL_PAGES * PAGE_SIZE; + heap_start = (HeapBlock *)(UINTN)phys; + + heap_start->magic = HEAP_BLOCK_MAGIC; + heap_start->state = HEAP_BLOCK_FREE; + heap_start->size = heap_size - sizeof(HeapBlock); + heap_start->next = NULL; + heap_start->prev = NULL; + + heap_ready = TRUE; + + SAFE_PRINT(Boot, L" Heap: %d KB at 0x%lx\n\r", + heap_size / 1024, phys); +} +``` + +The allocator assumes that the physical address returned by `pmm_alloc_pages` is accessible via identity mapping, so it can cast it directly to a `HeapBlock *`. + +### Alignment helper + +Allocations are rounded up to a fixed alignment (e.g., 16 bytes) using `align_up`: + +```361:364:/home/lochlan/Documents/Coding/c/os/memory.c +static UINTN align_up(UINTN val, UINTN align) +{ + return (val + align - 1) & ~(align - 1); +} +``` + +### Allocation (`kmalloc`) + +`kmalloc` performs a **first-fit** search of the free list: + +```401:440:/home/lochlan/Documents/Coding/c/os/memory.c +void *kmalloc(UINTN size) +{ + HeapBlock *block, *split; + UINTN aligned; + + if (!heap_ready || size == 0) { + return NULL; + } + + aligned = align_up(size, HEAP_ALIGN); + + for (block = heap_start; block != NULL; block = block->next) { + if (block->magic != HEAP_BLOCK_MAGIC) { + return NULL; /* heap corruption */ + } + + if (block->state != HEAP_BLOCK_FREE || block->size < aligned) { + continue; + } + + /* Try to split if there is room for another header + 16 bytes */ + if (block->size >= aligned + sizeof(HeapBlock) + HEAP_ALIGN) { + split = (HeapBlock *)((UINT8 *)block + sizeof(HeapBlock) + aligned); + split->magic = HEAP_BLOCK_MAGIC; + split->state = HEAP_BLOCK_FREE; + split->size = block->size - aligned - sizeof(HeapBlock); + split->next = block->next; + split->prev = block; + + if (block->next != NULL) { + block->next->prev = split; + } + + block->next = split; + block->size = aligned; + } + + block->state = HEAP_BLOCK_USED; + return (void *)((UINT8 *)block + sizeof(HeapBlock)); + } + + return NULL; /* out of heap memory */ +} +``` + +Notable details: + +- **Corruption detection** – checks `HEAP_BLOCK_MAGIC` for each block; any mismatch aborts with `NULL`. +- **Splitting** – if the free block is large enough, it is split into: + - An allocated block of exactly `aligned` bytes. + - A new trailing free block (`split`) with its own header. +- **Alignment** – the returned pointer is `sizeof(HeapBlock)` bytes after the header and aligned according to `HEAP_ALIGN`. + +### Freeing (`kfree`) and coalescing + +`kfree` marks a block as free and then attempts to coalesce with neighboring free blocks to combat fragmentation: + +```449:486:/home/lochlan/Documents/Coding/c/os/memory.c +void kfree(void *ptr) +{ + HeapBlock *block; + + if (ptr == NULL || !heap_ready) { + return; + } + + block = (HeapBlock *)((UINT8 *)ptr - sizeof(HeapBlock)); + + if (block->magic != HEAP_BLOCK_MAGIC || block->state != HEAP_BLOCK_USED) { + return; /* bad pointer or double-free */ + } + + block->state = HEAP_BLOCK_FREE; + + /* Coalesce with next neighbour */ + if (block->next != NULL + && block->next->magic == HEAP_BLOCK_MAGIC + && block->next->state == HEAP_BLOCK_FREE) { + block->size += sizeof(HeapBlock) + block->next->size; + block->next = block->next->next; + if (block->next != NULL) { + block->next->prev = block; + } + } + + /* Coalesce with previous neighbour */ + if (block->prev != NULL + && block->prev->magic == HEAP_BLOCK_MAGIC + && block->prev->state == HEAP_BLOCK_FREE) { + block->prev->size += sizeof(HeapBlock) + block->size; + block->prev->next = block->next; + if (block->next != NULL) { + block->next->prev = block->prev; + } + } +} +``` + +The allocator never returns memory to the PMM; all heap pages remain reserved for heap use for the lifetime of the kernel. + +### Heap statistics + +`heap_get_stats` walks the free list and aggregates total, used, and free bytes as well as block count: + +```488:508:/home/lochlan/Documents/Coding/c/os/memory.c +void heap_get_stats(UINTN *total, UINTN *used, UINTN *free_mem, + UINTN *num_blocks) +{ + HeapBlock *b; + + *total = 0; *used = 0; *free_mem = 0; *num_blocks = 0; + + if (!heap_ready) return; + + for (b = heap_start; b != NULL && b->magic == HEAP_BLOCK_MAGIC; + b = b->next) { + (*num_blocks)++; + *total += b->size; + if (b->state == HEAP_BLOCK_USED) { + *used += b->size; + } else { + *free_mem += b->size; + } + } +} +``` + +These statistics are surfaced to the user via the `mem` and `memtest` commands. + +--- + +## Runtime memory diagnostics (`mem` and `memtest`) + +The `mem` command (in `commands.c`) prints a snapshot of PMM and heap state by calling `memory_print_stats`: + +```525:562:/home/lochlan/Documents/Coding/c/os/memory.c +void memory_print_stats(BootInfo *Boot) +{ + UINTN h_total, h_used, h_free, h_blocks; + UINTN p_total, p_free, p_used; + + p_total = pmm_get_total_pages(); + p_free = pmm_get_free_pages(); + p_used = p_total - p_free; + + heap_get_stats(&h_total, &h_used, &h_free, &h_blocks); + + SAFE_PRINT(Boot, L"\n\r"); + SAFE_PRINT(Boot, L"Memory Statistics\n\r"); + SAFE_PRINT(Boot, L"================================================\n\r"); + ... + SAFE_PRINT(Boot, L"Paging:\n\r"); + SAFE_PRINT(Boot, L" CR3: 0x%lx\n\r", read_cr3()); + SAFE_PRINT(Boot, L" Mode: 4-level (PML4)\n\r"); + SAFE_PRINT(Boot, L"\n\r"); +} +``` + +The `memtest` command runs a scripted set of tests that exercise heap allocation, heap free/coalescing, and PMM single- and multi-page allocation: + +```306:379:/home/lochlan/Documents/Coding/c/os/commands.c +static void cmd_memtest(BootInfo *Boot, CHAR16 *Args) +{ + void *ptrs[8]; + UINTN sizes[] = { 16, 64, 128, 256, 512, 1024, 2048, 4096 }; + UINTN i; + UINT64 page; + UINTN h_total, h_used, h_free, h_blocks; + (void)Args; + + SAFE_PRINT(Boot, L"\n\r"); + SAFE_PRINT(Boot, L"Memory Test\n\r"); + SAFE_PRINT(Boot, L"================================================\n\r"); + ... + /* --- Heap allocation test --- */ + ... + /* --- Heap free test --- */ + ... + /* --- PMM page allocation test --- */ + ... + /* --- Multi-page allocation test --- */ + ... + SAFE_PRINT(Boot, L"\n\rAll memory tests completed.\n\r\n\r"); +} +``` + +These commands provide a convenient way to validate memory subsystem behaviour from the Starling Terminal without needing an external debugger. + diff --git a/docs/overview.md b/docs/overview.md new file mode 100644 index 0000000..c0f2b83 --- /dev/null +++ b/docs/overview.md @@ -0,0 +1,297 @@ +## Overview + +This document explains the high-level control flow of the operating system – from firmware entry through to the interactive shell – and introduces the major subsystems that the rest of the documentation explores in depth. + +- **Boot loader**: a UEFI application implemented in `main.c` that reads `kernel.elf`, maps its segments, prepares the `BootInfo` interface, and jumps into the kernel. +- **Kernel entry**: `kmain` in `kernel.c`, which initialises the IDT, memory, and tasking subsystems, then spawns the Starling Terminal task. +- **Subsystems**: memory management (`memory.c`), cooperative multitasking (`task.c`), interrupt/exception handling (`idt.c`), and the command/terminal layer (`commands.c` + `kernel.c`). + +The remaining sections walk through this path step by step and show how these modules interact. + +--- + +## Firmware and boot loader (`main.c`) + +The system starts execution inside the UEFI firmware, which invokes the PE32+ entry point of the loader. GNU-EFI arranges for this to be the `efi_main` function in `main.c`: + +```298:345:/home/lochlan/Documents/Coding/c/os/main.c +EFI_STATUS +EFIAPI +efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) +{ + EFI_STATUS Status; + VOID *KernelImage = NULL; + UINTN KernelSize = 0; + UINT64 KernelEntry = 0; + BootInfo Boot; + KernelEntryFn EntryFn = NULL; + + /* Initialise the GNU-EFI library */ + InitializeLib(ImageHandle, SystemTable); + + Print(L"Loading kernel...\n\r"); + + Status = read_file_to_buffer(ImageHandle, L"\\kernel.elf", &KernelImage, &KernelSize); + ... + Status = load_elf_kernel(KernelImage, KernelSize, &KernelEntry); + ... + /* Populate the BootInfo struct with generic UEFI-backed services */ + Boot.print = Print; + Boot.clear_screen = loader_clear_screen; + ... + Boot.free_pages = loader_free_pages; + Boot.firmware_vendor = SystemTable->FirmwareVendor; + ... + + /* Jump to the kernel – this should not return */ + EntryFn = (KernelEntryFn)(UINTN)KernelEntry; + EntryFn(&Boot); + + Print(L"Kernel returned. Halting.\n\r"); + return EFI_SUCCESS; +} +``` + +Key steps performed by `efi_main`: + +- **Load kernel image**: `read_file_to_buffer` opens `\\kernel.elf` from the EFI System Partition and reads it into a pool buffer. +- **Parse ELF64**: `load_elf_kernel` verifies ELF headers and iterates PT\_LOAD segments, mapping each to its requested virtual/physical address via UEFI `AllocatePages`, then zero-fills `.bss`: + +```168:223:/home/lochlan/Documents/Coding/c/os/main.c +static EFI_STATUS load_elf_kernel(VOID *Image, UINTN Size, UINT64 *EntryOut) +{ + Elf64_Ehdr *Ehdr = (Elf64_Ehdr *)Image; + ... + for (Index = 0; Index < Ehdr->e_phnum; Index++) { + Elf64_Phdr *Segment = (Elf64_Phdr *)((UINT8 *)Phdr + (Index * Ehdr->e_phentsize)); + ... + Address = (EFI_PHYSICAL_ADDRESS)SegmentStart; + if (EFI_ERROR(uefi_call_wrapper(BS->AllocatePages, 4, AllocateAddress, + EfiLoaderData, SegmentPages, &Address))) { + return EFI_OUT_OF_RESOURCES; + } + + CopyMem((VOID *)(UINTN)Segment->p_vaddr, + (UINT8 *)Image + Segment->p_offset, + (UINTN)Segment->p_filesz); + if (Segment->p_memsz > Segment->p_filesz) { + SetMem((VOID *)(UINTN)(Segment->p_vaddr + Segment->p_filesz), + (UINTN)(Segment->p_memsz - Segment->p_filesz), 0); + } + } + + *EntryOut = Ehdr->e_entry; + return EFI_SUCCESS; +} +``` + +- **Prepare the kernel ABI**: `BootInfo` is a compact struct of function pointers and metadata shared between loader and kernel: + +```21:62:/home/lochlan/Documents/Coding/c/os/boot_info.h +typedef struct { + /* Console I/O */ + KernelPrintFn print; + ConsoleClearFn clear_screen; + ConsoleSetAttrFn set_attribute; + KeyReadFn read_key; + KeyReadFn try_read_key; + + /* System control */ + void (*shutdown)(void); + + /* Physical memory */ + KSTATUS (*alloc_pages)(UINTN pages, UINT64 *addr); + KSTATUS (*free_pages)(UINT64 addr, UINTN pages); + + /* Firmware metadata (for informational commands only). */ + const CHAR16 *firmware_vendor; + UINT32 firmware_major; + UINT32 firmware_minor; +} BootInfo; +``` + +UEFI-specific calls (console I/O, page allocation, shutdown) are wrapped in small adapter functions (`loader_clear_screen`, `loader_alloc_pages`, etc.) and stored in this struct. The kernel never calls firmware entry points directly; instead it depends only on `BootInfo`. + +Finally, the loader: + +- Casts the ELF entry point to `KernelEntryFn`. +- Invokes `EntryFn(&Boot)`, transferring control to the kernel. + +--- + +## Kernel entry and subsystem initialisation (`kernel.c`) + +The C-level kernel entry point is `kmain` in `kernel.c`. It receives a single `BootInfo *` argument from the loader: + +```169:254:/home/lochlan/Documents/Coding/c/os/kernel.c +void kmain(BootInfo *Boot) +{ + KSTATUS Status; + Task *terminal_task = NULL; + StarlingContext *ctx = NULL; + + if (Boot == NULL) { + return; + } + + if (Boot->clear_screen != NULL) { + Status = Boot->clear_screen(); + ... + } + + if (Boot->set_attribute != NULL) { + Status = Boot->set_attribute(TEXT_ATTR(COLOR_LIGHTGREEN, COLOR_BLACK)); + ... + } + + /* ---- Subsystem initialisation ---- */ + idt_init(Boot); + memory_init(Boot); + task_init(Boot); + + /* ---- Welcome banner ---- */ + SAFE_PRINT(Boot, L" Welcome to Simple 64-bit Operating System!\n\r"); + ... + SAFE_PRINT(Boot, L"Type 'help' for a list of commands.\n\r\n\r"); + + /* ---- Spawn Starling Terminal as its own task ---- */ + ctx = (StarlingContext *)kmalloc(sizeof(StarlingContext)); + ... + terminal_task = task_create(L"starling-term", starling_terminal_task, ctx); + if (terminal_task == NULL) { + ... + starling_terminal_task(Boot); + return; + } + + SAFE_PRINT(Boot, L"[core] Started Starling Terminal (PID %d).\n\r", terminal_task->pid); + + /* Core thread becomes an idle loop, yielding to the terminal and others. */ + while (TRUE) { + task_yield(); + } +} +``` + +`kmain` performs three major duties: + +1. **Console setup** – clears the screen and sets a green-on-black colour scheme using firmware-backed services from `BootInfo`. +2. **Subsystem initialisation** – calls: + - `idt_init(Boot)` to install the kernel's Interrupt Descriptor Table and exception handlers. + - `memory_init(Boot)` to bring up the physical allocator, paging helpers, and heap. + - `task_init(Boot)` to bootstrap the cooperative scheduler and register the current thread as task 0. +3. **User interface** – prints a banner and spawns the Starling Terminal as a separate task via `task_create`, then turns the core thread into an idle loop that continuously `task_yield`s to allow other tasks to run. + +At this point, the system has: + +- A working IDT for CPU exceptions and IRQs. +- A memory stack providing page allocation, virtual mappings, and heap. +- A cooperative scheduler with at least two tasks: the core thread (task 0) and the terminal. + +--- + +## Starling Terminal and command dispatch + +Interactive user input is handled by the Starling Terminal task in `kernel.c`. It runs a read–eval–print loop that delegates command execution to `commands.c`: + +```47:163:/home/lochlan/Documents/Coding/c/os/kernel.c +static void starling_terminal_task(void *arg) +{ + StarlingContext *ctx = (StarlingContext *)arg; + BootInfo *Boot = NULL; + KeyEvent Key; + ... + + if (ctx == NULL || ctx->Boot == NULL) { + return; + } + + Boot = ctx->Boot; + depth = ctx->depth; + + SAFE_PRINT(Boot, L"\n\r[Starling Terminal depth %d] ready.\n\r\n\r", depth); + SAFE_PRINT(Boot, L"starling> "); + + while (TRUE) { + /* Try non-blocking read first; yield to other tasks while idle */ + if (Boot->try_read_key != NULL) { + Status = Boot->try_read_key(&Key); + if (Status != 0) { + task_yield(); + continue; + } + } else if (Boot->read_key != NULL) { + Status = Boot->read_key(&Key); + } else { + SAFE_PRINT(Boot, L"Console input unavailable.\n\r"); + break; + } + ... + if (Key.unicode_char == L'\r' || Key.unicode_char == L'\n') { + /* Enter pressed: execute the buffered command */ + line[len] = L'\0'; + SAFE_PRINT(Boot, L"\n\r"); + trim_spaces_inplace(line); + ... + } else { + Task *cmd_task = execute_command(Boot, line); + + /* If a command task was spawned, wait for it to finish. */ + if (cmd_task != NULL) { + task_wait(cmd_task); + } + + /* Reset for next command */ + len = 0; + SAFE_PRINT(Boot, L"starling> "); + } + } + ... + } + + /* Free our context on exit (allocated by the spawner). */ + kfree(ctx); +} +``` + +Key points: + +- **Non-blocking idle**: when `try_read_key` returns no key, the terminal calls `task_yield()` so other tasks can run while the user is idle. +- **Line editing**: handles printable ASCII and backspace to maintain a simple line buffer (`line[128]`). +- **Command execution**: on Enter, the line is trimmed and passed to `execute_command(Boot, line)` in `commands.c`. If that function spawns a dedicated command task, the terminal waits for it via `task_wait`. +- **Nested terminals**: entering `starling` recursively spawns another Starling Terminal task with increased `depth`, demonstrating multi-level shells. + +The command registry and dispatch path are documented in detail in `commands-and-terminal.md`. + +--- + +## Subsystem overview + +The kernel is organised into focused subsystems, each in its own translation unit: + +- **Type layer** (`kernel_types.h`): defines fixed-width and utility types such as `UINT8`, `UINT64`, `UINTN`, and `CHAR16`, deliberately avoiding firmware headers so that core kernel code remains decoupled from UEFI. +- **Boot ABI** (`boot_info.h`): defines `BootInfo`, `KeyEvent`, and function pointer types (`KernelPrintFn`, `ConsoleClearFn`, etc.) forming the sole contract between loader and kernel. +- **Memory management** (`memory.c` + `memory.h`): + - PMM – bitmap-based page-frame allocator over a 16 MB pool obtained from the loader. + - Paging – helpers to walk and extend the 4-level x86-64 page tables, map/unmap pages, and translate virtual to physical addresses. + - Heap – first-fit free-list allocator with block splitting and coalescing, backed by PMM pages. +- **Tasks and scheduler** (`task.c` + `task.h`): + - Static process control block (PCB) pool. + - Cooperative round-robin scheduler. + - Stack management and context switch support (via an external `context_switch` assembly routine). +- **Interrupts and exceptions** (`idt.c` + `idt.h`): + - IDT mirroring of firmware entries. + - Replacement of CPU exception vectors 0–31 with kernel stubs. + - Central `isr_handler` that prints diagnostics and halts on unrecoverable faults. +- **Commands and shell** (`commands.c` + `commands.h`): + - Command registry and help/man system. + - System control commands (`shutdown`, `about`, `mem`, `ps`). + - Test commands (`memtest`, `tasktest`, `spawn`) that exercise memory and scheduler subsystems in isolation. + +Each of these subsystems is covered in a dedicated document: + +- `memory-and-allocation.md` – PMM, paging, and heap internals. +- `tasks-and-scheduler.md` – task lifecycle, stacks, context switching, and scheduling. +- `interrupts-and-exceptions.md` – IDT construction, ISRs, and fault handling. +- `commands-and-terminal.md` – command pipeline from user input to handler execution. + diff --git a/docs/tasks-and-scheduler.md b/docs/tasks-and-scheduler.md new file mode 100644 index 0000000..5516f4a --- /dev/null +++ b/docs/tasks-and-scheduler.md @@ -0,0 +1,446 @@ +## Cooperative tasking overview + +The kernel uses a **cooperative** multitasking model implemented in `task.c`. Tasks (lightweight threads) must call `task_yield` explicitly to let others run; there is no preemptive timer interrupt that forces context switches. + +The key components are: + +- A fixed-size **PCB pool** (`Task tasks[TASK_MAX]`). +- A per-task **stack** allocated from the PMM. +- A **scheduler** that performs round-robin selection among READY tasks. +- A `context_switch` assembly routine that saves/restores callee-saved registers and the stack pointer. + +--- + +## Module state and initialisation + +The scheduler's global state is: + +```30:35:/home/lochlan/Documents/Coding/c/os/task.c +static Task tasks[TASK_MAX]; /* PCB pool (static array) */ +static Task *current_task = NULL; +static UINT32 next_pid = 0; +static BootInfo *task_boot = NULL; +static BOOLEAN task_ready = FALSE; +``` + +`task_init` is called from `kmain` after memory and IDT initialisation: + +```62:97:/home/lochlan/Documents/Coding/c/os/task.c +void task_init(BootInfo *Boot) +{ + UINTN i; + + task_boot = Boot; + + /* Clear all PCB slots */ + for (i = 0; i < TASK_MAX; i++) { + tasks[i].state = TASK_STATE_FREE; + tasks[i].pid = 0; + tasks[i].saved_rsp = 0; + tasks[i].stack_base = 0; + tasks[i].stack_pages = 0; + tasks[i].entry = NULL; + tasks[i].arg = NULL; + tasks[i].switches = 0; + tasks[i].name[0] = L'\0'; + } + + /* + * Task 0 = the currently running kernel core thread. + * It already has a stack (the kernel's boot stack), so we don't + * allocate one. Its saved_rsp will be filled in during the + * first context_switch call in task_yield(). + */ + tasks[0].pid = next_pid++; + tasks[0].state = TASK_STATE_RUNNING; + tasks[0].switches = 1; + wstrcpy16(tasks[0].name, L"core", TASK_NAME_LEN); + + current_task = &tasks[0]; + task_ready = TRUE; + + SAFE_PRINT(Boot, L" Tasks: scheduler ready (max %d tasks)\n\r", + (UINTN)TASK_MAX); +} +``` + +Important points: + +- Task 0 represents the **kernel core thread**, which uses the boot-time stack provided by the loader. +- No stack is allocated for task 0; its `saved_rsp` is populated the first time a context switch occurs. +- All other PCBs begin in `TASK_STATE_FREE`. + +--- + +## Task creation and stack layout + +New tasks are created via `task_create`, which: + +1. Finds a free PCB slot. +2. Allocates a stack from the PMM. +3. Sets up an initial stack frame so that `context_switch` can "return" into a C trampoline function. + +```121:197:/home/lochlan/Documents/Coding/c/os/task.c +Task *task_create(const CHAR16 *name, TaskEntryFn entry, void *arg) +{ + Task *t = NULL; + UINTN i; + UINT64 stack_phys; + UINT64 *sp; + ... + + /* Find a free PCB slot */ + for (i = 0; i < TASK_MAX; i++) { + if (tasks[i].state == TASK_STATE_FREE) { + t = &tasks[i]; + break; + } + } + ... + + /* Allocate stack pages from the physical memory manager */ + stack_phys = pmm_alloc_pages(TASK_STACK_PAGES); + if (stack_phys == 0) { + return NULL; /* out of memory */ + } + + /* Fill in the PCB */ + t->pid = next_pid++; + t->state = TASK_STATE_READY; + t->entry = entry; + t->arg = arg; + t->switches = 0; + t->stack_base = stack_phys; + t->stack_pages = TASK_STACK_PAGES; + wstrcpy16(t->name, name != NULL ? name : L"unnamed", TASK_NAME_LEN); + + /* + * Set up the initial stack frame so that context_switch() can + * "return" into task_trampoline(). + * + * context_switch saves/restores (low → high on stack): + * flags, r15, r14, r13, r12, rbx, rbp (pushes) + * then `ret` pops the return address (→ trampoline) + * + * Above the return address we place a safety-net address + * (task_exit) so that if the trampoline or entry function does + * a bare `ret`, it lands in task_exit(). + */ + sp = (UINT64 *)(stack_phys + TASK_STACK_SIZE); + + /* Align stack top to 16 bytes */ + sp = (UINT64 *)((UINT64)sp & ~0xFULL); + + /* Safety-net return address for the trampoline */ + *(--sp) = (UINT64)(UINTN)task_exit; + + /* Return address for context_switch's `ret` → trampoline */ + *(--sp) = (UINT64)(UINTN)task_trampoline; + + /* Callee-saved registers – all zero for fresh task */ + *(--sp) = 0; /* rbp */ + *(--sp) = 0; /* rbx */ + *(--sp) = 0; /* r12 */ + *(--sp) = 0; /* r13 */ + *(--sp) = 0; /* r14 */ + *(--sp) = 0; /* r15 */ + + /* RFLAGS – interrupts enabled (IF = bit 9) */ + *(--sp) = 0x202; /* flags */ + + t->saved_rsp = (UINT64)(UINTN)sp; + + return t; +} +``` + +The effective stack layout (low to high addresses) after `task_create` is: + +- Saved `flags`, `r15`, `r14`, `r13`, `r12`, `rbx`, `rbp` (pushed by `context_switch` semantics). +- Return address to `task_trampoline`. +- Safety-net return address to `task_exit`. + +This design guarantees that: + +- The first time the scheduler chooses this task, restoring registers and issuing `ret` will jump to `task_trampoline`. +- If the trampoline or entry function ever returns normally, execution will fall into `task_exit` rather than running off the end of the stack. + +--- + +## Trampoline and task entry + +The trampoline is a small C function that calls the user-supplied entry point and then terminates the task cleanly: + +```105:116:/home/lochlan/Documents/Coding/c/os/task.c +static void task_trampoline(void) +{ + Task *t = task_current(); + if (t != NULL && t->entry != NULL) { + t->entry(t->arg); + } + task_exit(); + /* Should never reach here, but just in case: */ + for (;;) { + __asm__ __volatile__("hlt"); + } +} +``` + +The entry function signature is: + +```12:17:/home/lochlan/Documents/Coding/c/os/task.h +typedef void (*TaskEntryFn)(void *arg); +``` + +This makes a task analogous to a `pthread`: + +- It receives an opaque `void *arg`. +- It runs arbitrary kernel code. +- On completion it returns to `task_trampoline`, which calls `task_exit`. + +--- + +## Scheduling and `task_yield` + +The scheduler is purely cooperative and uses a simple **round-robin** algorithm implemented by `schedule_next`: + +```203:230:/home/lochlan/Documents/Coding/c/os/task.c +static Task *schedule_next(void) +{ + UINTN start, idx, i; + + if (current_task == NULL) { + return &tasks[0]; + } + + /* Find current task's index in the array */ + start = (UINTN)(current_task - tasks); + + /* Round-robin: scan from (current+1) wrapping around */ + for (i = 1; i <= TASK_MAX; i++) { + idx = (start + i) % TASK_MAX; + if (tasks[idx].state == TASK_STATE_READY) { + return &tasks[idx]; + } + } + + /* No other ready task – stay with current if still runnable */ + if (current_task->state == TASK_STATE_RUNNING || + current_task->state == TASK_STATE_READY) { + return current_task; + } + + /* Fallback to task 0 (kernel / shell) */ + return &tasks[0]; +} +``` + +`task_yield` is the public API that tasks call to give up the CPU: + +```236:266:/home/lochlan/Documents/Coding/c/os/task.c +void task_yield(void) +{ + Task *prev, *next; + + if (!task_ready) { + return; + } + + prev = current_task; + next = schedule_next(); + + if (next == prev) { + return; /* nothing else to switch to */ + } + + /* Mark the previous task as READY (still runnable) */ + if (prev->state == TASK_STATE_RUNNING) { + prev->state = TASK_STATE_READY; + } + + next->state = TASK_STATE_RUNNING; + next->switches++; + current_task = next; + + /* + * context_switch saves callee-saved regs + flags on prev's stack, + * stores prev's RSP into prev->saved_rsp, loads next->saved_rsp + * into RSP, restores regs + flags, and `ret`s into next's code. + */ + context_switch(&prev->saved_rsp, next->saved_rsp); +} +``` + +The actual register-level state transition is performed by an external assembly function: + +```18:22:/home/lochlan/Documents/Coding/c/os/task.h +void context_switch(UINT64 *prev_rsp, UINT64 next_rsp); +``` + +Conceptually, `context_switch`: + +- Pushes callee-saved registers and FLAGS on the current stack. +- Stores the resulting stack pointer in `*prev_rsp`. +- Loads `next_rsp` into RSP. +- Pops registers and FLAGS from the new stack. +- Issues `ret`, returning into the next task's code. + +--- + +## Task termination (`task_exit`) + +Tasks terminate by calling `task_exit`, typically via the trampoline: + +```272:305:/home/lochlan/Documents/Coding/c/os/task.c +void task_exit(void) +{ + Task *prev, *next; + + if (!task_ready) { + return; + } + + prev = current_task; + prev->state = TASK_STATE_TERMINATED; + + /* Free the stack memory back to the PMM */ + if (prev->stack_base != 0 && prev->stack_pages != 0) { + pmm_free_pages(prev->stack_base, prev->stack_pages); + prev->stack_base = 0; + prev->stack_pages = 0; + } + + /* Mark the PCB slot as free for reuse */ + prev->state = TASK_STATE_FREE; + + next = schedule_next(); + if (next == prev) { + /* Shouldn't happen if task 0 (kernel) is always alive */ + next = &tasks[0]; + } + + next->state = TASK_STATE_RUNNING; + next->switches++; + current_task = next; + + /* One-way switch: we never return to the exited task */ + context_switch(&prev->saved_rsp, next->saved_rsp); + + /* Should never reach here */ + for (;;) { + __asm__ __volatile__("hlt"); + } +} +``` + +Key behaviours: + +- The task's stack pages are returned to the PMM via `pmm_free_pages`. +- The PCB slot is recycled back to `TASK_STATE_FREE`. +- The subsequent `context_switch` is **one-way**: control never returns to the exited task. + +--- + +## Waiting for tasks + +Certain parts of the kernel (e.g., the Starling Terminal and some commands) need to wait for a worker task to finish. This is done cooperatively via `task_wait`: + +```336:348:/home/lochlan/Documents/Coding/c/os/task.c +void task_wait(Task *t) +{ + if (!task_ready || t == NULL) { + return; + } + + /* + * Busy-wait cooperatively until the target task's PCB slot has + * been recycled back to FREE by task_exit(). + */ + while (t->state != TASK_STATE_FREE) { + task_yield(); + } +} +``` + +Because the scheduler is cooperative, this **busy-wait** loop is benign: it yields on each iteration, allowing the waited-on task to make progress and eventually call `task_exit`. + +Example usage from the Starling Terminal: + +```135:140:/home/lochlan/Documents/Coding/c/os/kernel.c +Task *cmd_task = execute_command(Boot, line); + +/* If a command task was spawned, wait for it to finish. */ +if (cmd_task != NULL) { + task_wait(cmd_task); +} +``` + +--- + +## Task inspection (`ps` and `tasktest`) + +The `ps` command uses `task_print_list` to show current tasks: + +```366:389:/home/lochlan/Documents/Coding/c/os/task.c +void task_print_list(BootInfo *Boot) +{ + UINTN i; + + SAFE_PRINT(Boot, L"\n\r"); + SAFE_PRINT(Boot, L" PID STATE SWITCHES NAME\n\r"); + SAFE_PRINT(Boot, L" --- ---------- -------- ----\n\r"); + + for (i = 0; i < TASK_MAX; i++) { + if (tasks[i].state == TASK_STATE_FREE) { + continue; + } + + SAFE_PRINT(Boot, L" %3d %-10s %8d %s\n\r", + tasks[i].pid, + state_str(tasks[i].state), + tasks[i].switches, + tasks[i].name); + } + + SAFE_PRINT(Boot, L"\n\r"); + SAFE_PRINT(Boot, L" Active tasks: %d / %d\n\r", + task_count(), (UINTN)TASK_MAX); + SAFE_PRINT(Boot, L"\n\r"); +} +``` + +The `tasktest` command in `commands.c` programmatically exercises the scheduler: + +```400:435:/home/lochlan/Documents/Coding/c/os/commands.c +static void cmd_tasktest(BootInfo *Boot, CHAR16 *Args) +{ + Task *t1, *t2, *t3; + UINTN i; + (void)Args; + ... + t1 = task_create(L"worker-A", worker_task_fn, Boot); + t2 = task_create(L"worker-B", worker_task_fn, Boot); + t3 = task_create(L"worker-C", worker_task_fn, Boot); + ... + SAFE_PRINT(Boot, L"\n\rYielding to let workers run:\n\r\n\r"); + + /* Yield enough times for all workers to complete (3 tasks x 3 steps) */ + for (i = 0; i < 12; i++) { + task_yield(); + } + + SAFE_PRINT(Boot, L"\n\rTask list after test:\n\r"); + task_print_list(Boot); + + SAFE_PRINT(Boot, L"Task scheduler test completed.\n\r\n\r"); +} +``` + +Each worker task: + +- Prints a progress message. +- Calls `task_yield`. +- Repeats three times, then finishes. + +This demonstrates how cooperative tasks interleave output and how `task_yield` drives scheduling. +