509 lines
15 KiB
Markdown
509 lines
15 KiB
Markdown
## Cooperative tasking overview
|
||
|
||
The kernel uses a **cooperative** multitasking model implemented in `task.c`. Tasks (lightweight threads) must call `task_yield` explicitly to let others run; there is no preemptive timer interrupt that forces context switches.
|
||
|
||
The key components are:
|
||
|
||
- A fixed-size **PCB pool** (`Task tasks[TASK_MAX]`).
|
||
- A per-task **stack** allocated from the PMM.
|
||
- A **scheduler** that performs round-robin selection among READY tasks.
|
||
- A `context_switch` assembly routine that saves/restores callee-saved registers and the stack pointer.
|
||
|
||
---
|
||
|
||
## Module state and initialisation
|
||
|
||
The scheduler's global state is:
|
||
|
||
```30:35:/home/lochlan/Documents/Coding/c/os/task.c
|
||
static Task tasks[TASK_MAX]; /* PCB pool (static array) */
|
||
static Task *current_task = NULL;
|
||
static UINT32 next_pid = 0;
|
||
static BootInfo *task_boot = NULL;
|
||
static BOOLEAN task_ready = FALSE;
|
||
```
|
||
|
||
`task_init` is called from `kmain` after memory and IDT initialisation:
|
||
|
||
```62:97:/home/lochlan/Documents/Coding/c/os/task.c
|
||
void task_init(BootInfo *Boot)
|
||
{
|
||
UINTN i;
|
||
|
||
task_boot = Boot;
|
||
|
||
/* Clear all PCB slots */
|
||
for (i = 0; i < TASK_MAX; i++) {
|
||
tasks[i].state = TASK_STATE_FREE;
|
||
tasks[i].pid = 0;
|
||
tasks[i].privilege = TASK_PRIV_USER;
|
||
tasks[i].saved_rsp = 0;
|
||
tasks[i].stack_base = 0;
|
||
tasks[i].stack_pages = 0;
|
||
tasks[i].entry = NULL;
|
||
tasks[i].arg = NULL;
|
||
tasks[i].switches = 0;
|
||
tasks[i].name[0] = L'\0';
|
||
}
|
||
|
||
/*
|
||
* Task 0 = the currently running kernel core thread.
|
||
* It already has a stack (the kernel's boot stack), so we don't
|
||
* allocate one. Its saved_rsp will be filled in during the
|
||
* first context_switch call in task_yield().
|
||
*/
|
||
tasks[0].pid = next_pid++;
|
||
tasks[0].state = TASK_STATE_RUNNING;
|
||
tasks[0].privilege = TASK_PRIV_KERNEL;
|
||
tasks[0].switches = 1;
|
||
wstrcpy16(tasks[0].name, L"core", TASK_NAME_LEN);
|
||
|
||
current_task = &tasks[0];
|
||
task_ready = TRUE;
|
||
|
||
SAFE_PRINT(Boot, L" Tasks: scheduler ready (max %d tasks)\n\r",
|
||
(UINTN)TASK_MAX);
|
||
}
|
||
```
|
||
|
||
Important points:
|
||
|
||
- Task 0 represents the **kernel core thread**, which uses the boot-time stack provided by the loader. It receives `TASK_PRIV_KERNEL` privilege.
|
||
- No stack is allocated for task 0; its `saved_rsp` is populated the first time a context switch occurs.
|
||
- All other PCBs begin in `TASK_STATE_FREE` with `TASK_PRIV_USER`.
|
||
|
||
---
|
||
|
||
## Task creation and stack layout
|
||
|
||
New tasks are created via `task_create_with_priv` (or its wrapper `task_create`), which:
|
||
|
||
1. Checks that the caller is not escalating privilege beyond its own level.
|
||
2. Finds a free PCB slot.
|
||
3. Allocates a stack from the PMM.
|
||
4. Sets up an initial stack frame so that `context_switch` can "return" into a C trampoline function.
|
||
|
||
```121:211:/home/lochlan/Documents/Coding/c/os/task.c
|
||
Task *task_create_with_priv(const CHAR16 *name,
|
||
TaskEntryFn entry,
|
||
void *arg,
|
||
TaskPrivilege privilege)
|
||
{
|
||
Task *t = NULL;
|
||
UINTN i;
|
||
UINT64 stack_phys;
|
||
UINT64 *sp;
|
||
...
|
||
|
||
/* Subsystem-level privilege enforcement: prevent privilege escalation. */
|
||
{
|
||
Task *caller = task_current();
|
||
if (caller != NULL && privilege > task_get_privilege(caller)) {
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
/* Find a free PCB slot */
|
||
for (i = 0; i < TASK_MAX; i++) {
|
||
if (tasks[i].state == TASK_STATE_FREE) {
|
||
t = &tasks[i];
|
||
break;
|
||
}
|
||
}
|
||
...
|
||
|
||
/* Allocate stack pages from the physical memory manager */
|
||
stack_phys = pmm_alloc_pages(TASK_STACK_PAGES);
|
||
if (stack_phys == 0) {
|
||
return NULL; /* out of memory */
|
||
}
|
||
|
||
/* Fill in the PCB */
|
||
t->pid = next_pid++;
|
||
t->state = TASK_STATE_READY;
|
||
t->privilege = privilege;
|
||
t->entry = entry;
|
||
t->arg = arg;
|
||
t->switches = 0;
|
||
t->stack_base = stack_phys;
|
||
t->stack_pages = TASK_STACK_PAGES;
|
||
wstrcpy16(t->name, name != NULL ? name : L"unnamed", TASK_NAME_LEN);
|
||
|
||
/*
|
||
* Set up the initial stack frame so that context_switch() can
|
||
* "return" into task_trampoline().
|
||
*
|
||
* context_switch saves/restores (low → high on stack):
|
||
* flags, r15, r14, r13, r12, rbx, rbp (pushes)
|
||
* then `ret` pops the return address (→ trampoline)
|
||
*
|
||
* Above the return address we place a safety-net address
|
||
* (task_exit) so that if the trampoline or entry function does
|
||
* a bare `ret`, it lands in task_exit().
|
||
*/
|
||
sp = (UINT64 *)(stack_phys + TASK_STACK_SIZE);
|
||
|
||
/* Align stack top to 16 bytes */
|
||
sp = (UINT64 *)((UINT64)sp & ~0xFULL);
|
||
|
||
/* Safety-net return address for the trampoline */
|
||
*(--sp) = (UINT64)(UINTN)task_exit;
|
||
|
||
/* Return address for context_switch's `ret` → trampoline */
|
||
*(--sp) = (UINT64)(UINTN)task_trampoline;
|
||
|
||
/* Callee-saved registers – all zero for fresh task */
|
||
*(--sp) = 0; /* rbp */
|
||
*(--sp) = 0; /* rbx */
|
||
*(--sp) = 0; /* r12 */
|
||
*(--sp) = 0; /* r13 */
|
||
*(--sp) = 0; /* r14 */
|
||
*(--sp) = 0; /* r15 */
|
||
|
||
/* RFLAGS – interrupts enabled (IF = bit 9) */
|
||
*(--sp) = 0x202; /* flags */
|
||
|
||
t->saved_rsp = (UINT64)(UINTN)sp;
|
||
|
||
return t;
|
||
}
|
||
```
|
||
|
||
The convenience wrapper `task_create` inherits the calling task's privilege level:
|
||
|
||
```213:220:/home/lochlan/Documents/Coding/c/os/task.c
|
||
Task *task_create(const CHAR16 *name, TaskEntryFn entry, void *arg)
|
||
{
|
||
/* Inherit privilege from the calling task (kernel if no task context). */
|
||
Task *caller = task_current();
|
||
TaskPrivilege priv = (caller != NULL) ? task_get_privilege(caller)
|
||
: TASK_PRIV_KERNEL;
|
||
return task_create_with_priv(name, entry, arg, priv);
|
||
}
|
||
```
|
||
|
||
The effective stack layout (low to high addresses) after `task_create` is:
|
||
|
||
- Saved `flags`, `r15`, `r14`, `r13`, `r12`, `rbx`, `rbp` (pushed by `context_switch` semantics).
|
||
- Return address to `task_trampoline`.
|
||
- Safety-net return address to `task_exit`.
|
||
|
||
This design guarantees that:
|
||
|
||
- The first time the scheduler chooses this task, restoring registers and issuing `ret` will jump to `task_trampoline`.
|
||
- If the trampoline or entry function ever returns normally, execution will fall into `task_exit` rather than running off the end of the stack.
|
||
|
||
---
|
||
|
||
## Trampoline and task entry
|
||
|
||
The trampoline is a small C function that calls the user-supplied entry point and then terminates the task cleanly:
|
||
|
||
```105:116:/home/lochlan/Documents/Coding/c/os/task.c
|
||
static void task_trampoline(void)
|
||
{
|
||
Task *t = task_current();
|
||
if (t != NULL && t->entry != NULL) {
|
||
t->entry(t->arg);
|
||
}
|
||
task_exit();
|
||
/* Should never reach here, but just in case: */
|
||
for (;;) {
|
||
__asm__ __volatile__("hlt");
|
||
}
|
||
}
|
||
```
|
||
|
||
The entry function signature is:
|
||
|
||
```12:17:/home/lochlan/Documents/Coding/c/os/task.h
|
||
typedef void (*TaskEntryFn)(void *arg);
|
||
```
|
||
|
||
This makes a task analogous to a `pthread`:
|
||
|
||
- It receives an opaque `void *arg`.
|
||
- It runs arbitrary kernel code.
|
||
- On completion it returns to `task_trampoline`, which calls `task_exit`.
|
||
|
||
---
|
||
|
||
## Scheduling and `task_yield`
|
||
|
||
The scheduler is purely cooperative and uses a simple **round-robin** algorithm implemented by `schedule_next`:
|
||
|
||
```203:230:/home/lochlan/Documents/Coding/c/os/task.c
|
||
static Task *schedule_next(void)
|
||
{
|
||
UINTN start, idx, i;
|
||
|
||
if (current_task == NULL) {
|
||
return &tasks[0];
|
||
}
|
||
|
||
/* Find current task's index in the array */
|
||
start = (UINTN)(current_task - tasks);
|
||
|
||
/* Round-robin: scan from (current+1) wrapping around */
|
||
for (i = 1; i <= TASK_MAX; i++) {
|
||
idx = (start + i) % TASK_MAX;
|
||
if (tasks[idx].state == TASK_STATE_READY) {
|
||
return &tasks[idx];
|
||
}
|
||
}
|
||
|
||
/* No other ready task – stay with current if still runnable */
|
||
if (current_task->state == TASK_STATE_RUNNING ||
|
||
current_task->state == TASK_STATE_READY) {
|
||
return current_task;
|
||
}
|
||
|
||
/* Fallback to task 0 (kernel / shell) */
|
||
return &tasks[0];
|
||
}
|
||
```
|
||
|
||
`task_yield` is the public API that tasks call to give up the CPU:
|
||
|
||
```236:266:/home/lochlan/Documents/Coding/c/os/task.c
|
||
void task_yield(void)
|
||
{
|
||
Task *prev, *next;
|
||
|
||
if (!task_ready) {
|
||
return;
|
||
}
|
||
|
||
prev = current_task;
|
||
next = schedule_next();
|
||
|
||
if (next == prev) {
|
||
return; /* nothing else to switch to */
|
||
}
|
||
|
||
/* Mark the previous task as READY (still runnable) */
|
||
if (prev->state == TASK_STATE_RUNNING) {
|
||
prev->state = TASK_STATE_READY;
|
||
}
|
||
|
||
next->state = TASK_STATE_RUNNING;
|
||
next->switches++;
|
||
current_task = next;
|
||
|
||
/*
|
||
* context_switch saves callee-saved regs + flags on prev's stack,
|
||
* stores prev's RSP into prev->saved_rsp, loads next->saved_rsp
|
||
* into RSP, restores regs + flags, and `ret`s into next's code.
|
||
*/
|
||
context_switch(&prev->saved_rsp, next->saved_rsp);
|
||
}
|
||
```
|
||
|
||
The actual register-level state transition is performed by an external assembly function:
|
||
|
||
```18:22:/home/lochlan/Documents/Coding/c/os/task.h
|
||
void context_switch(UINT64 *prev_rsp, UINT64 next_rsp);
|
||
```
|
||
|
||
Conceptually, `context_switch`:
|
||
|
||
- Pushes callee-saved registers and FLAGS on the current stack.
|
||
- Stores the resulting stack pointer in `*prev_rsp`.
|
||
- Loads `next_rsp` into RSP.
|
||
- Pops registers and FLAGS from the new stack.
|
||
- Issues `ret`, returning into the next task's code.
|
||
|
||
---
|
||
|
||
## Task termination (`task_exit`)
|
||
|
||
Tasks terminate by calling `task_exit`, typically via the trampoline:
|
||
|
||
```272:305:/home/lochlan/Documents/Coding/c/os/task.c
|
||
void task_exit(void)
|
||
{
|
||
Task *prev, *next;
|
||
|
||
if (!task_ready) {
|
||
return;
|
||
}
|
||
|
||
prev = current_task;
|
||
prev->state = TASK_STATE_TERMINATED;
|
||
|
||
/* Free the stack memory back to the PMM */
|
||
if (prev->stack_base != 0 && prev->stack_pages != 0) {
|
||
pmm_free_pages(prev->stack_base, prev->stack_pages);
|
||
prev->stack_base = 0;
|
||
prev->stack_pages = 0;
|
||
}
|
||
|
||
/* Mark the PCB slot as free for reuse */
|
||
prev->state = TASK_STATE_FREE;
|
||
|
||
next = schedule_next();
|
||
if (next == prev) {
|
||
/* Shouldn't happen if task 0 (kernel) is always alive */
|
||
next = &tasks[0];
|
||
}
|
||
|
||
next->state = TASK_STATE_RUNNING;
|
||
next->switches++;
|
||
current_task = next;
|
||
|
||
/* One-way switch: we never return to the exited task */
|
||
context_switch(&prev->saved_rsp, next->saved_rsp);
|
||
|
||
/* Should never reach here */
|
||
for (;;) {
|
||
__asm__ __volatile__("hlt");
|
||
}
|
||
}
|
||
```
|
||
|
||
Key behaviours:
|
||
|
||
- The task's stack pages are returned to the PMM via `pmm_free_pages`.
|
||
- The PCB slot is recycled back to `TASK_STATE_FREE`.
|
||
- The subsequent `context_switch` is **one-way**: control never returns to the exited task.
|
||
|
||
---
|
||
|
||
## Waiting for tasks
|
||
|
||
Certain parts of the kernel (e.g., the Starling Terminal and some commands) need to wait for a worker task to finish. This is done cooperatively via `task_wait`:
|
||
|
||
```336:348:/home/lochlan/Documents/Coding/c/os/task.c
|
||
void task_wait(Task *t)
|
||
{
|
||
if (!task_ready || t == NULL) {
|
||
return;
|
||
}
|
||
|
||
/*
|
||
* Busy-wait cooperatively until the target task's PCB slot has
|
||
* been recycled back to FREE by task_exit().
|
||
*/
|
||
while (t->state != TASK_STATE_FREE) {
|
||
task_yield();
|
||
}
|
||
}
|
||
```
|
||
|
||
Because the scheduler is cooperative, this **busy-wait** loop is benign: it yields on each iteration, allowing the waited-on task to make progress and eventually call `task_exit`.
|
||
|
||
Example usage from the Starling Terminal:
|
||
|
||
```135:140:/home/lochlan/Documents/Coding/c/os/kernel.c
|
||
Task *cmd_task = execute_command(Boot, line, shell_priv);
|
||
|
||
/* If a command task was spawned, wait for it to finish. */
|
||
if (cmd_task != NULL) {
|
||
task_wait(cmd_task);
|
||
}
|
||
```
|
||
|
||
---
|
||
|
||
## Task inspection (`ps` and `tasktest`)
|
||
|
||
The `ps` command uses `task_print_list` to show current tasks. Access requires at least `TASK_PRIV_DRIVER`:
|
||
|
||
```407:439:/home/lochlan/Documents/Coding/c/os/task.c
|
||
void task_print_list(BootInfo *Boot)
|
||
{
|
||
UINTN i;
|
||
Task *caller;
|
||
|
||
/* Subsystem-level privilege enforcement: task list requires DRIVER. */
|
||
caller = task_current();
|
||
if (caller != NULL && task_get_privilege(caller) < TASK_PRIV_DRIVER) {
|
||
SAFE_PRINT(Boot, L"Permission denied: task list requires driver privilege.\n\r");
|
||
return;
|
||
}
|
||
|
||
SAFE_PRINT(Boot, L"\n\r");
|
||
SAFE_PRINT(Boot, L" PID STATE PRIV SWITCHES NAME\n\r");
|
||
SAFE_PRINT(Boot, L" --- ---------- ---- -------- ----\n\r");
|
||
|
||
for (i = 0; i < TASK_MAX; i++) {
|
||
if (tasks[i].state == TASK_STATE_FREE) {
|
||
continue;
|
||
}
|
||
|
||
SAFE_PRINT(Boot, L" %3d %-10s %4d %8d %s\n\r",
|
||
tasks[i].pid,
|
||
state_str(tasks[i].state),
|
||
(INT32)tasks[i].privilege,
|
||
tasks[i].switches,
|
||
tasks[i].name);
|
||
}
|
||
|
||
SAFE_PRINT(Boot, L"\n\r");
|
||
SAFE_PRINT(Boot, L" Active tasks: %d / %d\n\r",
|
||
task_count(), (UINTN)TASK_MAX);
|
||
SAFE_PRINT(Boot, L"\n\r");
|
||
}
|
||
```
|
||
|
||
The `tasktest` command in `commands.c` programmatically exercises the scheduler:
|
||
|
||
```400:435:/home/lochlan/Documents/Coding/c/os/commands.c
|
||
static void cmd_tasktest(BootInfo *Boot, CHAR16 *Args)
|
||
{
|
||
Task *t1, *t2, *t3;
|
||
UINTN i;
|
||
(void)Args;
|
||
...
|
||
t1 = task_create(L"worker-A", worker_task_fn, Boot);
|
||
t2 = task_create(L"worker-B", worker_task_fn, Boot);
|
||
t3 = task_create(L"worker-C", worker_task_fn, Boot);
|
||
...
|
||
SAFE_PRINT(Boot, L"\n\rYielding to let workers run:\n\r\n\r");
|
||
|
||
/* Yield enough times for all workers to complete (3 tasks x 3 steps) */
|
||
for (i = 0; i < 12; i++) {
|
||
task_yield();
|
||
}
|
||
|
||
SAFE_PRINT(Boot, L"\n\rTask list after test:\n\r");
|
||
task_print_list(Boot);
|
||
|
||
SAFE_PRINT(Boot, L"Task scheduler test completed.\n\r\n\r");
|
||
}
|
||
```
|
||
|
||
Each worker task:
|
||
|
||
- Prints a progress message.
|
||
- Calls `task_yield`.
|
||
- Repeats three times, then finishes.
|
||
|
||
This demonstrates how cooperative tasks interleave output and how `task_yield` drives scheduling.
|
||
|
||
---
|
||
|
||
## Privilege system
|
||
|
||
Each task carries a `TaskPrivilege` level defined in `task.h`:
|
||
|
||
```47:52:/home/lochlan/Documents/Coding/c/os/task.h
|
||
typedef enum {
|
||
TASK_PRIV_USER = 0,
|
||
TASK_PRIV_DRIVER = 1,
|
||
TASK_PRIV_KERNEL = 2,
|
||
} TaskPrivilege;
|
||
```
|
||
|
||
All tasks still execute in CPU ring 0; this is a **software-only** hierarchy used for access control decisions:
|
||
|
||
- `task_create_with_priv` prevents a caller from creating a task with a higher privilege than its own.
|
||
- Subsystem functions like `memory_print_stats`, `task_print_list`, and `request_shutdown` check the calling task's privilege before proceeding.
|
||
- The `kusr` command (`commands.c`) temporarily elevates a task to `TASK_PRIV_KERNEL` to run a privileged sub-command, then restores the original level.
|
||
|
||
Accessors:
|
||
|
||
- `task_get_privilege(Task *t)` – returns the task's current privilege level.
|
||
- `task_set_privilege(Task *t, TaskPrivilege p)` – changes it (no enforcement; callers are responsible).
|
||
|