Zig 181 lines
const std = @import("std");
const Io = std.Io;
// Deterministic newc cpio encoder.
//
// build.zig invokes it as
// build_initramfs <output.cpio> <stage_dir> <arc1>:<mode1> <arc2>:<mode2> ...
// where the <arcN> list is pre-sorted lexicographically by build.zig
// and <modeN> is the entry's octal newc mode (per-file modes so the
// VFS permission layer can distinguish /etc/shadow 0600 from the
// 0755 binaries). For each arc the encoder reads <stage_dir>/<arc>
// and emits one newc entry with name "./<arc>" (matches the cpio(1)
// `find . -type f` layout the initramfs.zig parser already
// canonicalises via its `./` strip). All headers fix mtime / uid /
// gid / nlink so the archive bytes are a pure function of file
// contents + name list + mode list, not host filesystem state.
//
// Replaces the addSystemCommand cpio(1) block in build.zig — bsdcpio
// stamps the host-clock mtime into c_mtime and gives every entry a
// fresh inode at byte 12, which drifted between two clean builds and
// blocked Pi-hash baseline refresh.
const MAGIC = "070701";
const HEADER_SIZE: usize = 110;
const READ_LIMIT: Io.Limit = .limited(1 << 24); // 16 MiB / file is plenty
pub fn main(init: std.process.Init) !void {
const io = init.io;
const gpa = init.gpa;
const arena = init.arena.allocator();
const args = try init.minimal.args.toSlice(arena);
if (args.len < 3) {
std.debug.panic(
"usage: build_initramfs <output.cpio> <stage_dir> <arc>...\n",
.{},
);
}
const out_path = args[1];
const stage_path = args[2];
const arcs = args[3..];
var stage = try Io.Dir.cwd().openDir(io, stage_path, .{});
defer stage.close(io);
var out_file = try Io.Dir.cwd().createFile(io, out_path, .{});
defer out_file.close(io);
var out_buf: [64 * 1024]u8 = undefined;
var out_writer = out_file.writer(io, &out_buf);
const w = &out_writer.interface;
var ino: u32 = 1;
for (arcs) |arc_spec| {
// Each token is "<arc>:<octal mode>" (build.zig formats it).
// Refusing a token without a mode keeps a stale invocation from
// silently flattening every entry back to one mode.
const colon = std.mem.lastIndexOfScalar(u8, arc_spec, ':') orelse
return error.MissingMode;
const arc = arc_spec[0..colon];
const mode = try std.fmt.parseInt(u32, arc_spec[colon + 1 ..], 8);
const data = try stage.readFileAlloc(io, arc, gpa, READ_LIMIT);
defer gpa.free(data);
try emitEntry(w, ino, arc, mode, data);
ino += 1;
}
try emitTrailer(w, ino);
try w.flush();
}
fn emitEntry(w: *Io.Writer, ino: u32, arc: []const u8, mode: u32, data: []const u8) !void {
// Name written into the archive matches cpio(1) `find . -type f`
// output ("./<arc>") so the initramfs.zig `./`-strip canonicaliser
// produces "/<arc>" for `locate("/sbin/init")` etc.
var name_buf: [512]u8 = undefined;
if (arc.len + 2 >= name_buf.len) return error.NameTooLong;
name_buf[0] = '.';
name_buf[1] = '/';
@memcpy(name_buf[2 .. 2 + arc.len], arc);
name_buf[2 + arc.len] = 0;
const name_with_nul = name_buf[0 .. 2 + arc.len + 1];
try writeHeader(w, .{
.ino = ino,
.mode = mode,
.filesize = @intCast(data.len),
.namesize = @intCast(name_with_nul.len),
});
try w.writeAll(name_with_nul);
try padTo4(w, HEADER_SIZE + name_with_nul.len);
try w.writeAll(data);
try padTo4(w, data.len);
}
fn emitTrailer(w: *Io.Writer, ino: u32) !void {
const name = "TRAILER!!!\x00";
try writeHeader(w, .{
.ino = ino,
.mode = 0,
.filesize = 0,
.namesize = @intCast(name.len),
});
try w.writeAll(name);
try padTo4(w, HEADER_SIZE + name.len);
}
const HeaderArgs = struct {
ino: u32,
mode: u32,
filesize: u32,
namesize: u32,
};
fn writeHeader(w: *Io.Writer, h: HeaderArgs) !void {
try w.writeAll(MAGIC);
try writeHex8(w, h.ino);
try writeHex8(w, h.mode);
try writeHex8(w, 0); // uid
try writeHex8(w, 0); // gid
try writeHex8(w, 1); // nlink — GNU cpio writes 1 on the trailer too
try writeHex8(w, 0); // mtime
try writeHex8(w, h.filesize);
try writeHex8(w, 0); // devmajor
try writeHex8(w, 0); // devminor
try writeHex8(w, 0); // rdevmajor
try writeHex8(w, 0); // rdevminor
try writeHex8(w, h.namesize);
try writeHex8(w, 0); // check
}
fn writeHex8(w: *Io.Writer, v: u32) !void {
var buf: [8]u8 = undefined;
const hex = "0123456789ABCDEF";
var i: usize = 8;
var x = v;
while (i > 0) {
i -= 1;
buf[i] = hex[x & 0xF];
x >>= 4;
}
try w.writeAll(&buf);
}
fn padTo4(w: *Io.Writer, n: usize) !void {
const pad = (4 - (n & 3)) & 3;
if (pad == 0) return;
try w.writeAll(("\x00\x00\x00")[0..pad]);
}
// ---- Host tests ----
//
// Pin the byte offsets the kernel parser (src/initramfs.zig) reads:
// mode at 14, uid at 22, gid at 30. A drift between this encoder and
// that parser is a silent permission bypass, so the offsets are
// asserted here against literal hex.
test "emitEntry stamps the per-file mode into the newc mode field" {
var buf: [512]u8 = undefined;
var w: Io.Writer = .fixed(&buf);
try emitEntry(&w, 1, "etc/shadow", 0o100600, "x");
const out = w.buffered();
try std.testing.expectEqualStrings("070701", out[0..6]);
// 0o100600 == 0x8180; newc fields are 8-digit uppercase hex.
try std.testing.expectEqualStrings("00008180", out[14..22]);
try std.testing.expectEqualStrings("00000000", out[22..30]); // uid root
try std.testing.expectEqualStrings("00000000", out[30..38]); // gid root
}
test "emitEntry gives two entries distinct modes" {
var buf: [1024]u8 = undefined;
var w: Io.Writer = .fixed(&buf);
try emitEntry(&w, 1, "bin/fsh", 0o100755, "\x7fELF");
const first_len = w.buffered().len;
try emitEntry(&w, 2, "etc/shadow", 0o100600, "s");
const out = w.buffered();
// 0o100755 == 0x81ED on the first header; 0x8180 on the second.
try std.testing.expectEqualStrings("000081ED", out[14..22]);
try std.testing.expectEqualStrings("00008180", out[first_len + 14 .. first_len + 22]);
}