ajhahn.de
← FlashOS
Flash 402 lines
// initramfs: embedded newc cpio parser.
//
// Pure parser + host tests. Kernel-side integration lives in
// src/sys.zig (sys_openFile / sys_read) and src/kernel.zig
// (PID-1 ELF flip); the `.initramfs` linker section + `.incbin`
// build glue lives in src/board/<board>/linker.ld + build.zig.
//
// newc cpio reference: `man 5 cpio` "New ASCII Format". All numeric
// header fields are 8-byte ASCII hex (uppercase, no `0x` prefix). The
// 110-byte header is followed by the name (length = namesize, including
// the NUL terminator), padded so the next file-data byte sits on a
// 4-byte boundary. File data follows, also padded to 4. The archive
// ends with an entry named `TRAILER!!!` whose filesize is 0.

const std = #import("std")
const builtin = #import("builtin")

// Linker-provided section bounds. Defined by `.initramfs : { … }` in
// src/board/<board>/linker.ld. Host builds never reference
// these — the comptime branch in baseKva()/baseSize() reads the
// per-test fixture globals below instead.
extern var __initramfs_start u8
extern var __initramfs_end u8

const LINEAR_MAP_BASE u64 = 0xFFFF000000000000

inline fn baseKva() [*]u8 {
    if builtin.target.os.tag == .freestanding {
        return #ptrFromInt(#intFromPtr(&__initramfs_start) | LINEAR_MAP_BASE)
    } else {
        return host_fixture_base
    }
}

inline fn baseSize() usize {
    if builtin.target.os.tag == .freestanding {
        return #intFromPtr(&__initramfs_end) - #intFromPtr(&__initramfs_start)
    } else {
        return host_fixture_size
    }
}

// Host-test injection points. Tests set these before driving the public
// API. The freestanding branches above never read them; the symbols
// live in BSS for the few bytes they cost.
pub var host_fixture_base [*]u8 = undefined
pub var host_fixture_size usize = 0

pub const Entry = struct {
    // Borrows into the archive bytes — lifetime = archive.
    name []u8,
    data []u8,
    mode u32,
    // Owner ids from the newc header. The build encoder stamps
    // them (root today); the initramfs VFS backend forwards them into
    // OpenResult so the permission layer can gate non-root access.
    uid u32,
    gid u32,
}

pub const ParseError = error{ InvalidHex, BadMagic, ShortArchive }

const HEADER_SIZE usize = 110
const HEADER_MAGIC = "070701"
const TRAILER = "TRAILER!!!"

// Byte offsets of the five header fields the parser reads. The other
// eight 8-byte fields (ino/nlink/mtime/dev*/check) are ignored.
const OFF_MAGIC usize = 0
const OFF_MODE usize = 6 + 8 * 1
const OFF_UID usize = 6 + 8 * 2
const OFF_GID usize = 6 + 8 * 3
const OFF_FILESIZE usize = 6 + 8 * 6
const OFF_NAMESIZE usize = 6 + 8 * 11

fn parseHex8(buf *[8]u8) ParseError!u32 {
    var v u32 = 0
    for c in buf {
        v <<= 4
        v |= switch c {
            '0'...'9' => #as(u32, c - '0'),
            'A'...'F' => #as(u32, c - 'A' + 10),
            'a'...'f' => #as(u32, c - 'a' + 10),
            else => return error.InvalidHex,
        }
    }
    return v
}

inline fn align4(x usize) usize {
    return (x + 3) & ~#as(usize, 3)
}

// Byte-wise slice compare. Forwards to utilc.mem_eql_bytes; see that
// helper for the strict-alignment rationale.
extern fn mem_eql_bytes(a [*]u8, b [*]u8, n u64) bool

fn bytesEql(a []u8, b []u8) bool {
    if a.len != b.len { return false }
    return mem_eql_bytes(a.ptr, b.ptr, a.len)
}

pub const Iterator = struct {
    archive []u8,
    cursor usize = 0,

    pub fn next(self *mut Iterator) ParseError!?Entry {
        if self.cursor + HEADER_SIZE > self.archive.len { return error.ShortArchive }
        const hdr = self.archive[self.cursor..][0..HEADER_SIZE]

        if !bytesEql(hdr[OFF_MAGIC..][0..6], HEADER_MAGIC) { return error.BadMagic }

        const mode = try parseHex8(hdr[OFF_MODE..][0..8])
        const uid = try parseHex8(hdr[OFF_UID..][0..8])
        const gid = try parseHex8(hdr[OFF_GID..][0..8])
        const filesize = try parseHex8(hdr[OFF_FILESIZE..][0..8])
        const namesize = try parseHex8(hdr[OFF_NAMESIZE..][0..8])
        // namesize counts the trailing NUL, so it can never legitimately
        // be zero. Catch it explicitly so the name_end subtraction below
        // can't underflow.
        if namesize == 0 { return error.ShortArchive }

        const name_start = self.cursor + HEADER_SIZE
        const name_end = name_start + namesize - 1
        if name_end > self.archive.len { return error.ShortArchive }
        const raw_name = self.archive[name_start..name_end]
        // cpio(1) reading `find . -type f` output stores entries as
        // `./sbin/init`; the kernel API and plan tests use the leading-
        // slash form `/sbin/init`. Slice off the `.` so all consumers see
        // canonical absolute paths. TRAILER!!! has no `./` prefix so the
        // check is safe before the trailer terminator below.
        const name = if (raw_name.len >= 2 && raw_name[0] == '.' && raw_name[1] == '/') raw_name[1..] else raw_name

        const data_start = align4(name_start + namesize)
        const data_end = data_start + filesize
        if data_end > self.archive.len { return error.ShortArchive }
        const data = self.archive[data_start..data_end]

        self.cursor = align4(data_end)

        if bytesEql(name, TRAILER) { return null }
        return Entry{ .name = name, .data = data, .mode = mode, .uid = uid, .gid = gid }
    }
}

pub fn iterator() Iterator {
    const base = baseKva()
    return .{ .archive = base[0..baseSize()], .cursor = 0 }
}

pub fn locate(path []u8) ParseError!?Entry {
    var it = iterator()
    while try it.next() |e| {
        if bytesEql(e.name, path) { return e }
    }
    return null
}

// Directory-synthesis helper for readdir. initramfs is a flat cpio: it
// stores files (`/bin/cat`, `/sbin/init`) and never directory entries —
// directories exist only as path prefixes. directEntry decides what (if
// anything) a stored `name` contributes to a listing of `prefix`:
//
//   * null when `name` does not live under `prefix`;
//   * the direct child segment otherwise, flagged is_dir=true when
//     `name` nests further below that child (so the child is a synthetic
//     subdirectory) and is_dir=false when the child IS `name`'s leaf.
//
// `prefix` must carry a trailing slash ("/bin/", or "/" for the root).
// The backend feeds stored names through this in cpio (sorted) order and
// collapses adjacent equal children, so the synthetic subdirs de-dup.
//
//   directEntry("/bin/cat",  "/bin/") -> { "cat", is_dir=false }
//   directEntry("/bin/cat",  "/")     -> { "bin", is_dir=true  }
//   directEntry("/sbin/init","/bin/") -> null
//
// Flash cannot spell an anonymous struct inline in a return type, so the
// result shape is named here (the inline `?struct { … }` of the Zig
// original).
const DirectChild = struct { child []u8, is_dir bool }

pub fn directEntry(name []u8, prefix []u8) ?DirectChild {
    if name.len <= prefix.len { return null }
    if !bytesEql(name[0..prefix.len], prefix) { return null }
    const rest = name[prefix.len..]
    // A slash inside the remainder means the direct child is a synthetic
    // subdirectory; its first segment is the listed name.
    if std.mem.indexOfScalar(u8, rest, '/') |s| {
        return .{ .child = rest[0..s], .is_dir = true }
    }
    return .{ .child = rest, .is_dir = false }
}

// ---- Host tests ----
//
// buildFixture() is a comptime newc encoder used solely by the tests
// below. It mirrors the runtime encoder under scripts/build_initramfs.zig
// (only used if cpio(1) is proven non-deterministic), so any
// drift between encoder and decoder shows up here first.

// pub so initramfs_backend.zig's readdir host tests can build the same
// comptime cpio fixtures (comptime-only encoder; dead-code-eliminated
// from the freestanding build).
pub const FixtureEntry = struct {
    name []u8,
    data []u8,
    mode u32,
    // Owner ids. Defaulted so pre-existing fixtures (which all
    // model root-owned files) stay unchanged; permission tests override.
    uid u32 = 0,
    gid u32 = 0,
}

fn comptimeHex8(comptime v u32) []u8 {
    return std.fmt.comptimePrint("{X:0>8}", .{v})
}

fn padBytes(comptime n usize) []u8 {
    return switch n & 3 {
        0 => "",
        1 => "\x00\x00\x00",
        2 => "\x00\x00",
        3 => "\x00",
        else => unreachable,
    }
}

fn emitEntry(comptime e FixtureEntry) []u8 {
    const hdr = HEADER_MAGIC ++ comptimeHex8(1) ++ // ino
        comptimeHex8(e.mode) ++ // mode
        comptimeHex8(e.uid) ++ // uid
        comptimeHex8(e.gid) ++ // gid
        "00000001" ++ // nlink
        "00000000" ++ // mtime
        comptimeHex8(#intCast(e.data.len)) ++ // filesize
        "00000000" ++ // devmajor
        "00000000" ++ // devminor
        "00000000" ++ // rdevmajor
        "00000000" ++ // rdevminor
        comptimeHex8(#intCast(e.name.len + 1)) ++ // namesize incl. NUL
        "00000000" // check
    const name = e.name ++ "\x00"
    const name_pad = padBytes(hdr.len + name.len)
    const data_pad = padBytes(e.data.len)
    return hdr ++ name ++ name_pad ++ e.data ++ data_pad
}

fn emitTrailer() []u8 {
    const hdr = HEADER_MAGIC ++ ("00000000" ** 4) ++ // ino, mode, uid, gid
        "00000001" ++ // nlink — GNU cpio writes 1 on the trailer too
        ("00000000" ** 6) ++ // mtime, filesize, dev*4
        comptimeHex8(#intCast(TRAILER.len + 1)) ++ // namesize
        "00000000" // check
    const name = TRAILER ++ "\x00"
    const pad = padBytes(hdr.len + name.len)
    return hdr ++ name ++ pad
}

pub fn buildFixture(comptime entries []FixtureEntry) []u8 {
    comptime var out []u8 = ""
    inline for e in entries { out = out ++ emitEntry(e) }
    out = out ++ emitTrailer()
    return out
}

test "locate hit returns name + data + mode" {
    comptime const fixture = buildFixture(&.{
        .{ .name = "hi", .data = "OK", .mode = 0o100644 },
    })
    host_fixture_base = fixture.ptr
    host_fixture_size = fixture.len

    const e = (try locate("hi")) orelse return error.NotFound
    try std.testing.expectEqualStrings("hi", e.name)
    try std.testing.expectEqualStrings("OK", e.data)
    try std.testing.expectEqual(#as(u32, 0o100644), e.mode)
    // Un-annotated fixtures default to root ownership.
    try std.testing.expectEqual(#as(u32, 0), e.uid)
    try std.testing.expectEqual(#as(u32, 0), e.gid)
}

test "locate parses uid/gid from the newc header" {
    comptime const fixture = buildFixture(&.{
        .{ .name = "home", .data = "X", .mode = 0o100600, .uid = 1000, .gid = 1000 },
    })
    host_fixture_base = fixture.ptr
    host_fixture_size = fixture.len

    const e = (try locate("home")) orelse return error.NotFound
    try std.testing.expectEqual(#as(u32, 0o100600), e.mode)
    try std.testing.expectEqual(#as(u32, 1000), e.uid)
    try std.testing.expectEqual(#as(u32, 1000), e.gid)
}

test "locate miss returns null" {
    comptime const fixture = buildFixture(&.{
        .{ .name = "/sbin/init", .data = "X", .mode = 0o100755 },
    })
    host_fixture_base = fixture.ptr
    host_fixture_size = fixture.len

    try std.testing.expectEqual(#as(?Entry, null), try locate("/nope"))
}

test "empty archive: trailer alone terminates iteration" {
    comptime const fixture = buildFixture(&.{})
    host_fixture_base = fixture.ptr
    host_fixture_size = fixture.len

    var it = iterator()
    try std.testing.expectEqual(#as(?Entry, null), try it.next())
}

test "multi-entry walk preserves order and pads correctly" {
    comptime const fixture = buildFixture(&.{
        .{ .name = "a", .data = "AAA", .mode = 0o100644 },
        .{ .name = "bb", .data = "BB", .mode = 0o100644 },
        .{ .name = "ccc", .data = "C", .mode = 0o100644 },
    })
    host_fixture_base = fixture.ptr
    host_fixture_size = fixture.len

    var it = iterator()
    const e1 = (try it.next()) orelse return error.MissingEntry
    try std.testing.expectEqualStrings("a", e1.name)
    try std.testing.expectEqualStrings("AAA", e1.data)

    const e2 = (try it.next()) orelse return error.MissingEntry
    try std.testing.expectEqualStrings("bb", e2.name)
    try std.testing.expectEqualStrings("BB", e2.data)

    const e3 = (try it.next()) orelse return error.MissingEntry
    try std.testing.expectEqualStrings("ccc", e3.name)
    try std.testing.expectEqualStrings("C", e3.data)

    try std.testing.expectEqual(#as(?Entry, null), try it.next())
}

test "bad magic returns BadMagic" {
    var hdr [HEADER_SIZE]u8 = [_]u8{0} ** HEADER_SIZE
    #memcpy(hdr[0..6], "999999")
    host_fixture_base = &hdr
    host_fixture_size = hdr.len

    var it = iterator()
    try std.testing.expectError(error.BadMagic, it.next())
}

test "leading ./ in archive name canonicalises to /" {
    // Mirrors the on-disk shape `cd $stage; find . -type f | cpio -o`
    // emits: entry names carry a `./` prefix. The parser strips the
    // dot so locate("/sbin/init") matches.
    comptime const fixture = buildFixture(&.{
        .{ .name = "./sbin/init", .data = "\x7fELF", .mode = 0o100755 },
    })
    host_fixture_base = fixture.ptr
    host_fixture_size = fixture.len

    const e = (try locate("/sbin/init")) orelse return error.NotFound
    try std.testing.expectEqualStrings("/sbin/init", e.name)
    try std.testing.expectEqualStrings("\x7fELF", e.data)
}

test "header truncated below 110 bytes returns ShortArchive" {
    var buf [50]u8 = [_]u8{0} ** 50
    #memcpy(buf[0..6], HEADER_MAGIC)
    host_fixture_base = &buf
    host_fixture_size = buf.len

    var it = iterator()
    try std.testing.expectError(error.ShortArchive, it.next())
}

test "directEntry: leaf file under its directory" {
    const de = directEntry("/bin/cat", "/bin/") orelse return error.NoEntry
    try std.testing.expectEqualStrings("cat", de.child)
    try std.testing.expectEqual(false, de.is_dir)
}

test "directEntry: nested file contributes its first segment as a synthetic dir" {
    const de = directEntry("/bin/cat", "/") orelse return error.NoEntry
    try std.testing.expectEqualStrings("bin", de.child)
    try std.testing.expectEqual(true, de.is_dir)
}

test "directEntry: name outside the prefix returns null" {
    try std.testing.expect(directEntry("/sbin/init", "/bin/") == null)
}

test "directEntry: the directory itself contributes nothing" {
    // name == prefix-without-slash, and name == prefix both yield null:
    // a directory does not list itself.
    try std.testing.expect(directEntry("/bin", "/bin/") == null)
    try std.testing.expect(directEntry("/bin/", "/bin/") == null)
}

test "directEntry: deeper nesting still lists only the first segment" {
    const de = directEntry("/usr/local/bin/x", "/usr/") orelse return error.NoEntry
    try std.testing.expectEqualStrings("local", de.child)
    try std.testing.expectEqual(true, de.is_dir)
}