Flash 356 lines
// sched: round-robin scheduler with priority counters.
// All extern-struct layouts (TaskStruct, CoreContext, MmStruct, UserPage,
// KeRegs) live in src/task_layout.zig as the single source of truth.
// The .S files (sched.S, entry.S) consume those layouts via raw offsets.
const layout = #import("task_layout")
const TaskStruct = layout.TaskStruct
const TASK_RUNNING = layout.TASK_RUNNING
const TASK_ZOMBIE = layout.TASK_ZOMBIE
const TASK_INTERRUPTIBLE = layout.TASK_INTERRUPTIBLE
const KTHREAD = layout.KTHREAD
const MAX_PAGE_COUNT = layout.MAX_PAGE_COUNT
const fdtable = #import("fdtable")
const NR_TASKS usize = 64
extern fn core_switch_to(prev *mut TaskStruct, next *mut TaskStruct) void
extern fn set_pgd(pgd u64) void
extern fn irq_enable() void
extern fn irq_disable() void
extern fn free_page(p u64) void
extern fn free_kernel_page(kp u64) void
// Internal callers (schedule, timer_tick) reach _schedule_impl through
// the patchable trampoline `_schedule` in
// src/trace/patchable_trampolines.S, so tracing fires on every
// scheduler entry, not only cross-module callers.
extern fn _schedule() void
var init_task TaskStruct = .{
.priority = 1,
.flags = KTHREAD,
}
export var current ?*mut TaskStruct = null
export var task [NR_TASKS]?*mut TaskStruct = .{null} ** NR_TASKS
export var nr_tasks i32 = 1
// Monotonic pid allocator. init_task occupies pid 0; first user fork is pid 1.
export var next_pid i32 = 1
export fn preempt_disable() void {
current.?.preempt_count += 1
}
export fn preempt_enable() void {
current.?.preempt_count -= 1
}
/// Index of the RUNNING task with the highest `counter`, or null if no
/// task is RUNNING. Ties broken by lower index (strict `>` means the
/// first equal-counter slot wins). Pure: walks the slice as-is, no
/// mutation, no extern calls — host-testable.
pub fn pick_next_running(tasks []?*mut TaskStruct) ?usize {
var best ?usize = null
var best_c i64 = -1
var i usize = 0
while i < tasks.len {
if tasks[i] |p| {
if p.state == TASK_RUNNING && p.counter > best_c {
best_c = p.counter
best = i
}
}
i += 1
}
return best
}
/// Refill every non-null task's counter to `(counter >> 1) + priority`.
/// Called when the highest-counter RUNNING task has counter == 0 (round-
/// end). `counter` is i64 — `>>` is arithmetic, so an over-decremented
/// counter halves toward zero without flipping sign.
pub fn refill_counters(tasks []?*mut TaskStruct) void {
var i usize = 0
while i < tasks.len {
if tasks[i] |p| {
p.counter = (p.counter >> 1) + p.priority
}
i += 1
}
}
/// Flip `t` to ZOMBIE and wake an INTERRUPTIBLE parent. Caller must
/// hold preempt_disable. Pure state mutation — no scheduling, no page
/// frees. Shared between sys_kill (target-task) and exit_process (self).
pub fn zombify_and_wake_parent(t *mut TaskStruct) void {
t.state = TASK_ZOMBIE
if t.parent |p| {
if p.state == TASK_INTERRUPTIBLE {
p.state = TASK_RUNNING
}
}
}
export fn _schedule_impl() void {
preempt_disable()
var idx usize = 0
outer: while true {
if pick_next_running(&task) |i| {
if task[i].?.counter != 0 {
idx = i
break :outer
}
}
refill_counters(&task)
}
switch_to(task[idx].?)
preempt_enable()
}
export fn schedule() void {
current.?.counter = 0
_schedule()
}
export fn switch_to(next *mut TaskStruct) void {
if current == next {
return
}
const prev = current.?
current = next
// Kernel threads (mm.pgd == 0) share the boot-time id_pg_dir for
// TTBR0; writing 0 there would unmap low memory and instantly fault
// on the next ret to a low-VA kernel function (e.g. ret_from_fork).
if next.mm.pgd != 0 {
set_pgd(next.mm.pgd)
}
core_switch_to(prev, next)
}
export fn timer_tick() void {
const cur = current.?
cur.counter -= 1
if cur.counter > 0 || cur.preempt_count > 0 {
return
}
cur.counter = 0
irq_enable()
_schedule()
irq_disable()
}
export fn exit_process() void {
// Self-reap is unsafe here — the kernel page IS the running task's
// stack + TaskStruct; the parent's sys_wait does the page sweep.
preempt_disable()
zombify_and_wake_parent(current.?)
preempt_enable()
schedule()
}
// Walk every slot of `list` and free the contained physical page if it
// is non-zero. `field` selects the page address: pass null for slot
// entries that are bare `u64` (kernel_pages), pass a field name for
// struct slots (user_pages → "pa"). Inlined: the comptime field selector
// is the whole point.
inline fn free_page_list(list anytype, comptime field ?[]u8) void {
var j usize = 0
while j < list.len {
var v u64 = undefined
if field |f| {
v = #field(list[j], f)
} else {
v = list[j]
}
if v != 0 {
free_page(v)
}
j += 1
}
}
// Free a task's entire user address space: the user-mapped physical pages
// followed by the page-table pages (PGD/PUD/PMD/PTE live in kernel_pages).
// This is the mm-sweep half of do_wait_impl's zombie reap, factored out so
// fork's copy_process failure paths can release a partially- or fully-built
// child mm without duplicating the loop. It does NOT touch fds (the caller
// closes those first) or the TaskStruct page itself. A KTHREAD child never
// populated an mm, so both lists are empty and this is a no-op for it.
export fn release_user_mm(t *mut TaskStruct) void {
free_page_list(&t.mm.user_pages, "pa")
free_page_list(&t.mm.kernel_pages, null)
}
// Walk task[] for any child of `current`. If a zombie is found, free its
// resources and return its pid. If children exist but none are zombies,
// block (TASK_INTERRUPTIBLE) and retry on wake. Returns -1 if no children.
export fn do_wait_impl() i32 {
preempt_disable()
while true {
var have_children bool = false
var i usize = 0
while i < NR_TASKS {
if task[i] |c| {
if c.parent == current.? {
have_children = true
if c.state == TASK_ZOMBIE {
const pid i32 = c.pid
// Close fds the zombie left open. unref drops
// each Pipe/File refcount and frees the backing
// page at refs == 0. Runs before the mm-page
// sweep: a refcounted Pipe/File page (not in
// user/kernel_pages) must not be double-freed.
fdtable.closeAll(c)
// Free the mm: user-mapped pages then the page-table
// pages (PGD/PUD/PMD/PTE). Shared with fork's
// copy_process failure-path cleanup.
release_user_mm(c)
// Drop the slot before freeing the kernel page so
// a stale pointer can never be observed.
task[i] = null
if c.kstack != 0 {
free_kernel_page(c.kstack)
}
free_kernel_page(#intFromPtr(c))
preempt_enable()
return pid
}
}
}
i += 1
}
if !have_children {
preempt_enable()
return -1
}
current.?.state = TASK_INTERRUPTIBLE
preempt_enable()
schedule()
preempt_disable()
}
}
export fn sched_init() void {
current = &init_task
task[0] = &init_task
}
// ---------------------------------------------------------------------
// Host tests. The pure helpers run against caller-
// owned TaskStruct fixtures + a local `tasks` slice, so the global
// `task` / `current` state is never touched and tests stay hermetic.
// ---------------------------------------------------------------------
const std = #import("std")
test "refill_counters: each non-null task gets (c>>1) + priority" {
var t1 TaskStruct = .{ .priority = 4, .counter = 10 }
var t2 TaskStruct = .{ .priority = 2, .counter = 0 }
var tasks [3]?*mut TaskStruct = .{ &t1, null, &t2 }
refill_counters(&tasks)
try std.testing.expectEqual(#as(i64, (10 >> 1) + 4), t1.counter)
try std.testing.expectEqual(#as(i64, 0 + 2), t2.counter)
}
test "refill_counters: null slots are skipped" {
var tasks [4]?*mut TaskStruct = .{ null, null, null, null }
refill_counters(&tasks)
}
test "refill_counters: negative counter halves via arithmetic shift" {
// counter is i64 — `>>` on signed types is arithmetic in Zig. Guards
// the assumption that an over-decremented counter (e.g. a kill
// racing the timer tick) still refills sanely.
var t TaskStruct = .{ .priority = 5, .counter = -3 }
var tasks [1]?*mut TaskStruct = .{&t}
refill_counters(&tasks)
// -3 >> 1 == -2 (arithmetic shift), + 5 == 3
try std.testing.expectEqual(#as(i64, 3), t.counter)
}
test "pick_next_running: returns index of highest-counter RUNNING task" {
var t0 TaskStruct = .{ .state = TASK_RUNNING, .counter = 5 }
var t1 TaskStruct = .{ .state = TASK_RUNNING, .counter = 9 }
var t2 TaskStruct = .{ .state = TASK_RUNNING, .counter = 7 }
var tasks [3]?*mut TaskStruct = .{ &t0, &t1, &t2 }
try std.testing.expectEqual(#as(?usize, 1), pick_next_running(&tasks))
}
test "pick_next_running: ignores ZOMBIE and INTERRUPTIBLE" {
var t0 TaskStruct = .{ .state = TASK_ZOMBIE, .counter = 99 }
var t1 TaskStruct = .{ .state = TASK_INTERRUPTIBLE, .counter = 50 }
var t2 TaskStruct = .{ .state = TASK_RUNNING, .counter = 3 }
var tasks [3]?*mut TaskStruct = .{ &t0, &t1, &t2 }
try std.testing.expectEqual(#as(?usize, 2), pick_next_running(&tasks))
}
test "pick_next_running: null when no RUNNING task exists" {
var t0 TaskStruct = .{ .state = TASK_ZOMBIE }
var tasks [2]?*mut TaskStruct = .{ &t0, null }
try std.testing.expectEqual(#as(?usize, null), pick_next_running(&tasks))
}
test "pick_next_running: first-match wins on counter ties" {
var t0 TaskStruct = .{ .state = TASK_RUNNING, .counter = 5 }
var t1 TaskStruct = .{ .state = TASK_RUNNING, .counter = 5 }
var tasks [2]?*mut TaskStruct = .{ &t0, &t1 }
// Strict `>` — later-equal cannot displace earlier, so t0 wins.
try std.testing.expectEqual(#as(?usize, 0), pick_next_running(&tasks))
}
test "zombify_and_wake_parent: child->ZOMBIE, INTERRUPTIBLE parent->RUNNING" {
var parent TaskStruct = .{ .state = TASK_INTERRUPTIBLE }
var child TaskStruct = .{ .state = TASK_RUNNING, .parent = &parent }
zombify_and_wake_parent(&child)
try std.testing.expectEqual(TASK_ZOMBIE, child.state)
try std.testing.expectEqual(TASK_RUNNING, parent.state)
}
test "zombify_and_wake_parent: RUNNING parent stays RUNNING" {
var parent TaskStruct = .{ .state = TASK_RUNNING }
var child TaskStruct = .{ .state = TASK_RUNNING, .parent = &parent }
zombify_and_wake_parent(&child)
try std.testing.expectEqual(TASK_ZOMBIE, child.state)
try std.testing.expectEqual(TASK_RUNNING, parent.state)
}
test "zombify_and_wake_parent: ZOMBIE parent stays ZOMBIE (orphan path)" {
var parent TaskStruct = .{ .state = TASK_ZOMBIE }
var child TaskStruct = .{ .state = TASK_RUNNING, .parent = &parent }
zombify_and_wake_parent(&child)
try std.testing.expectEqual(TASK_ZOMBIE, child.state)
try std.testing.expectEqual(TASK_ZOMBIE, parent.state)
}
test "zombify_and_wake_parent: null parent does not crash" {
var child TaskStruct = .{ .state = TASK_RUNNING, .parent = null }
zombify_and_wake_parent(&child)
try std.testing.expectEqual(TASK_ZOMBIE, child.state)
}
// host_stubs_sched.zig counts free_page calls so release_user_mm's sweep
// is observable: every non-zero user_pages[*].pa and kernel_pages[*] slot
// must be freed exactly once, empty slots skipped.
extern fn sched_free_count() u64
extern fn sched_reset_free_count() void
test "release_user_mm frees every populated user and kernel page" {
sched_reset_free_count()
var t TaskStruct = .{}
t.mm.user_pages[0].pa = 0x40001000
t.mm.user_pages[1].pa = 0x40002000
t.mm.kernel_pages[0] = 0x40003000 // pgd
t.mm.kernel_pages[1] = 0x40004000
release_user_mm(&t)
try std.testing.expectEqual(#as(u64, 4), sched_free_count())
}
test "release_user_mm is a no-op on an empty (KTHREAD) mm" {
sched_reset_free_count()
var t TaskStruct = .{}
release_user_mm(&t)
try std.testing.expectEqual(#as(u64, 0), sched_free_count())
}