Flash 343 lines
// kernel: boot and main loop.
const initramfs = #import("initramfs")
const initramfs_backend = #import("initramfs_backend")
const fat32_backend = #import("fat32_backend")
const fdtable = #import("fdtable")
const task_layout = #import("task_layout")
const MU i32 = 0
const PL i32 = 1
// Boot status lines render through the shared console_ui module (lib/
// console_ui/) — the one place a bracket tag or an ANSI color is spelled.
// `boot` binds the Mini-UART console as the sink, so each bring-up step logs
// as `boot.ok(...)` / `boot.skip(...)` / `boot.warn(...)`. Restyle the whole
// boot log by editing console_ui, not here. Cosmetic — none of these lines are
// grepped by the boot contract. (The userspace contract markers in fsh.zig /
// login_elf.zig still hand-roll the `[ OK ]` form; migrating them onto
// console_ui is a follow-up.)
const console_ui = #import("console_ui")
// console_ui Sink bound to the Mini-UART boot console. Byte-at-a-time via
// main_output_char so the slice-based renderer meets the kernel's
// NUL-terminated main_output without a buffer — and without growing the tight
// per-task kernel stack.
fn bootSink(bytes []u8) void {
for b in bytes { main_output_char(MU, b) }
}
const boot = console_ui.logger(&bootSink)
const KTHREAD u64 = 1
// IRQ numbers
const VC_AUX_IRQ u32 = 125
const NS_PHYS_TIMER_IRQ u32 = 30
// UART / utils
extern fn mini_uart_init() void
extern fn main_output(interface i32, str [*:0]u8) void
extern fn main_output_u64(interface i32, n u64) void
extern fn main_output_char(interface i32, ch u8) void
extern fn main_output_process(interface i32, p *mut task_layout.TaskStruct) void
extern fn delay(ticks u64) void
extern fn get_el() u32
// Generic timer
extern fn generic_timer_init() void
extern fn get_sys_count() u64
extern fn hwrng_init() void
// IRQ
extern fn enable_interrupt_gic(intid u32, core u32) void
extern fn irq_init_vectors() void
extern fn irq_enable() void
// Fork / sched
extern fn copy_process(clone_flags u64, fn_ptr u64, arg u64) i32
extern fn prepare_move_to_user_elf(blob_addr_kva u64, blob_size u64) i32
extern fn sched_init() void
extern fn schedule() void
extern var current ?*mut task_layout.TaskStruct
// Syscall table
extern fn sys_call_table_relocate() void
// Board-driver trampolines. kernel.zig became a named module (src/kernel.flash);
// its generated .zig lives in the build cache, so it can no longer reach the
// board bag by a relative @import. The thin C-ABI wrappers live in the build
// root (src/start.zig), which imports the board bag as a named module — the same
// role fork.zig's move_to_user_elf_argv plays for execve. Reached here by symbol.
extern fn board_irq_init() void
extern fn board_usb_init() i32
extern fn board_usb_poll() void
extern fn board_emmc2_init() i32
extern fn board_emmc2_write_block(lba u32, buf *[512]u8) i32
extern fn board_emmc2_read_block(lba u32, buf *mut [512]u8) i32
extern fn board_uart_poll_rx_into_console() void
// Trace
extern fn trace_init() void
extern fn trace_output_kernel_pts(interface i32) void
extern fn pl011_uart_init() void
extern fn ksyms_init() void
// Page allocator
extern fn mem_map_init() void
extern fn mem_map_reserve_below(end_pa u64) void
extern fn mem_map_reserve_above(start_pa u64) void
// PA marker emitted by both board linker scripts: the page just past the
// kernel image and its board-specific reserved regions (page tables on
// rpi4b; page tables + 64 MiB sdscratch on virt). Read at boot so the
// page allocator never returns a PA that overlaps the kernel image.
extern var _kernel_pa_end u8
const build_options = #import("build_options")
extern fn dump_free_count() u64
// Cross-core boot synchronization
export var state u32 = 0
/// Run by PID 1; returns to entry.S and does a kernel_exit 0.
///
/// PID 1 is ELF-loaded: `/sbin/init` is the `pid1.elf`
/// artifact baked into the embedded initramfs. Its bytes (already
/// TTBR1-mapped, no allocation) go to `prepare_move_to_user_elf`,
/// the same loader the exec-elf / flibc test payloads use.
export fn kernel_process() void {
const entry_opt = initramfs.locate("/sbin/init") catch null
if (entry_opt == null) {
main_output(MU, "PID 1: /sbin/init missing from initramfs\n")
return
}
const entry = entry_opt.?
// Pre-install stdio as console fds before handing control to EL0.
// Console slots are refcount-exempt
// shared singletons (ptr=null, kind=console) so the three installs
// allocate no page and leave the free-page baseline untouched.
// fork() inherits them via fdtable.dupAll; execve() preserves them.
// User-space sees fd 0/1/2 already wired to the mini-UART.
const cur *mut task_layout.TaskStruct = current.?
_ = fdtable.install(cur, .console, null)
_ = fdtable.install(cur, .console, null)
_ = fdtable.install(cur, .console, null)
const blob_kva u64 = #intFromPtr(entry.data.ptr)
const err = prepare_move_to_user_elf(blob_kva, entry.data.len)
if (err < 0) {
main_output(MU, "PID 1: ELF load failed\n")
}
}
// Scratch LBA for the EL1 block-I/O smoke check. Retargeted from
// LBA 34_816 to LBA 2064: the single-partition
// format_sd.sh means the old 34_816 falls inside the FAT32 data
// region and would collide with user files once the disk fills in
// LBA 2064 sits in the FAT32 reserved-sector window
// (partition start LBA 2048 + 16 = 17th reserved sector, between the
// BPB at LBA 2048 and FAT1 around LBA 2080), which no FAT32 driver
// reads or writes. The 16-sector offset matches the BPB's
// `reserved_sec_cnt = 32` window minus the first BPB sector and the
// FSInfo at LBA 2049 — well clear of both. One-constant permanent fix.
const EMMC2_BLOCK_LBA u32 = 2064
// EL1-side block-I/O smoke check. Writes a deterministic pattern to
// EMMC2_BLOCK_LBA, reads it back through the same vtable, byte-
// compares. Emits `[PASS] emmc2-block` on match and `[FAIL]
// emmc2-block` (with a short reason tag) otherwise. Both buffers
// live on the kernel stack — no page allocation, no shift to the
// free-page baseline. scripts/run_qemu_test.sh greps for `[FAIL]
// emmc2-block` and fails the run if present; the EL0 16/16 tally is
// unaffected because this scenario runs before PID 1 is forked.
fn run_emmc2_smoke() void {
var write_buf [512]u8 = undefined
var read_buf [512]u8 = undefined
var i usize = 0
while (i < 512) {
write_buf[i] = #intCast((i + 0x42) & 0xFF)
i += 1
}
main_output(MU, "[TEST] emmc2-block\n")
if (board_emmc2_write_block(EMMC2_BLOCK_LBA, &write_buf) != 0) {
main_output(MU, "[FAIL] emmc2-block (write)\n")
return
}
if (board_emmc2_read_block(EMMC2_BLOCK_LBA, &read_buf) != 0) {
main_output(MU, "[FAIL] emmc2-block (read)\n")
return
}
i = 0
while (i < 512) {
if (read_buf[i] != write_buf[i]) {
main_output(MU, "[FAIL] emmc2-block (mismatch)\n")
return
}
i += 1
}
main_output(MU, "[PASS] emmc2-block\n")
}
export fn kernel_main_impl(id u64) void {
// core 0 initializes mini-uart and handles uart interrupts
if (id == 0) {
// Page allocator bitmap zeroed first so anything later in bring-up
// can hit get_free_page without a lazy-init branch.
mem_map_init()
// Reserve PAs occupied by the kernel image so get_free_page never
// hands out a page that overlaps `.text` / `.data` / `.bss` /
// page tables / sdscratch. On rpi4b the kernel sits below the
// pool — reserve_below is a no-op. On virt the kernel is loaded
// inside the pool window and the reservation is load-bearing.
mem_map_reserve_below(#intFromPtr(&_kernel_pa_end))
// Cap the pool at the actual RAM end on virt (QEMU `-m 1G` ⇒
// RAM ends at 0x80000000, well below MALLOC_END's RPi-derived
// 0xFC000000). Without this, an exhausting allocator path would
// hand out PAs that map to nothing once the in-RAM half is full.
if (build_options.board == .virt) {
mem_map_reserve_above(0x80000000)
}
// Mini-UART first so the boot status lines land on the same cable
// (pin 14/15) as the exception handler's "ERROR CAUGHT" output.
mini_uart_init()
boot.ok("Initialized Mini-UART console")
// Startup banner right after the console comes up, so the log reads
// chronologically: core 0 is the first thing running, before any of
// the subsystem bring-up below. (Secondary cores park at the
// `while (id != 0)` gate and never reach here, so this is core-0 only.)
console_ui.tagged(&bootSink, console_ui.ok)
bootSink("Booted core ")
main_output_char(MU, #intCast(id + '0'))
bootSink(" (EL")
main_output_char(MU, #intCast(get_el() + '0'))
bootSink(")\n")
pl011_uart_init()
boot.ok("Initialized PL011 trace UART")
irq_init_vectors()
boot.ok("Loaded exception vectors")
// Board-specific GIC bring-up: GICv3 needs ICC_*_EL1 + per-core
// redistributor wakeup. Pi's GICv2 inlines to nothing.
board_irq_init()
enable_interrupt_gic(VC_AUX_IRQ, #intCast(id))
boot.ok("Enabled interrupt controller")
// USB-OTG gadget bring-up (DWC2). The device MMIO at 0xFE980000 is
// already device-mapped by boot.S, so this needs no page allocator.
// Fails soft on QEMU (no DWC2 device path) — bounded waits return
// -1 and the polled console simply never enumerates. Serviced from
// the PID-0 idle loop below.
if (board_usb_init() < 0) {
boot.skip("USB gadget (no controller)")
} else {
boot.ok("Started USB gadget")
}
ksyms_init()
boot.ok("Loaded kernel symbols")
sys_call_table_relocate()
boot.ok("Relocated syscall table")
trace_init()
boot.ok("Initialized trace subsystem")
trace_output_kernel_pts(PL)
boot.ok("Started kernel trace output")
// VFS root mount bring-up. initramfs_backend
// only sets pointers — no get_free_page — so it slots in ahead
// of the free-page baseline emit without shifting it. The FAT32
// /mnt mount is wired later, after board.emmc2.init() has wired
// block_dev.sd_dev (fat32_backend.init issues block reads).
initramfs_backend.init()
boot.ok("Mounted initramfs root")
// Block-device bring-up. On virt
// the memory-backed fake never fails — graceful degradation
// (log + continue) is still the contract for the rpi4b
// driver, which can fail on missing SD card.
// The smoke check below covers acceptance #2 + #7 in one
// shot: it exercises the BlockDev vtable end-to-end and
// proves init() wired `block_dev.sd_dev`.
if (board_emmc2_init() < 0) {
boot.skip("EMMC2 block device (init failed)")
} else {
boot.ok("Initialized EMMC2 block device")
// Pre-PID-1 block-device smoke — part of the boot-as-test path,
// gated so a clean (non-selftest) boot stays quiet.
if (build_options.boot_selftest) { run_emmc2_smoke() }
// FAT32 /mnt mount — needs block_dev.sd_dev, wired just
// above by board.emmc2.init(). Fails soft: a blank/bad
// disk leaves mount_table[1] null and /mnt/* resolves to
// ENOENT.
if (fat32_backend.init() < 0) {
boot.skip("/mnt (no FAT32 volume)")
} else {
boot.ok("Mounted /mnt (FAT32)")
// Permission overlay: init() parsed PERMS.TAB
// into the backend's table. A mounted volume without a
// parseable overlay is the loud anti-brick announcement:
// /mnt runs on defaults (shadow floored 0600 root:root)
// until the operator reseeds the overlay file.
if (!fat32_backend.overlay_ok) {
boot.warn("/mnt overlay missing - defaults active, shadow floored")
}
}
}
// Entropy source bring-up. Seeds the fallback generator
// from CNTPCT (readable from reset — independent of the
// generic-timer IRQ setup below), self-tests, and announces the
// active source. The announce line tees into the kernel log ring,
// where [TEST] rng asserts it later. Allocates nothing.
hwrng_init()
// Boot-time free-page baseline. Logged before any task is created
// so the user-space dumps later in the trace can be compared
// against this absolute reference.
if (build_options.boot_selftest) { _ = dump_free_count() }
state = 0
}
// single core for now
while (id != 0) {}
delay(30000)
// generic timer and timer IRQ (vectors already loaded on core 0)
generic_timer_init()
enable_interrupt_gic(NS_PHYS_TIMER_IRQ, #intCast(id))
irq_enable()
// let the next core run
state += 1
while (true) {
if (id != 0 || state != 1) { continue }
sched_init()
// create pid 1, kernel threads don't need a user stack page
const res = copy_process(KTHREAD, #intFromPtr(&kernel_process), 0)
if (res <= 0) {
main_output(MU, "fork error\n")
}
while (true) {
// Idle-path UART RX poll (PID 0) — defensive backstop. The AUX
// RX interrupt is the primary drain and reaches handle_irq on
// real hardware; this only catches a byte left between IRQ
// slots. No-op on virt.
board_uart_poll_rx_into_console()
board_usb_poll()
schedule()
}
}
}