ajhahn.de
← Flash
Zig 161 lines
// diff-corpus — the stage0-vs-stage1 differential harness.
//
// Runs two flashc binaries over every .flash file in the given directories
// and asserts byte-identical observable behaviour in three modes:
//
//   * transpile      (flashc FILE)               — stdout, stderr, exit status
//   * token stream   (flashc --dump-tokens FILE) — stdout, stderr, exit status
//   * formatting     (flashc fmt COPY)           — the rewritten file's bytes,
//                                                  stderr, exit status
//
// Files that the compiler rejects participate too: both binaries must then
// reject with the same diagnostics and the same exit status, so the corpus
// covers the error surface as well as the happy path. Any divergence is
// reported and the run exits non-zero. This byte-equality check is the
// license every hybrid module swap must renew.
//
// The stage1 binary renders diagnostics with a source caret by default;
// stage0 is frozen and prints the bare one-line form. Every stage1
// invocation therefore carries --plain-diagnostics, which restores the
// frozen bytes — so the comparison keeps pinning the diagnostic text itself.
//
// Usage: diff-corpus <stage0-flashc> <stage1-flashc> <dir> [dir ...]

const std = @import("std");
const Io = std.Io;

const tmp_root = ".zig-cache/diff-corpus";
const max_file_size: usize = 1 << 20;

pub fn main(init: std.process.Init) !void {
    const io = init.io;
    const arena = init.arena.allocator();

    var stdout_buf: [4096]u8 = undefined;
    var stdout_obj = std.Io.File.stdout().writer(io, &stdout_buf);
    const out = &stdout_obj.interface;
    defer out.flush() catch {};

    const args = try init.minimal.args.toSlice(arena);
    if (args.len < 4) {
        try out.writeAll("usage: diff-corpus <stage0-flashc> <stage1-flashc> <dir> [dir ...]\n");
        return error.BadArguments;
    }
    const stage0 = args[1];
    const stage1 = args[2];

    try Io.Dir.cwd().createDirPath(io, tmp_root);

    var mismatches: usize = 0;
    var files: usize = 0;
    for (args[3..]) |dir_path| {
        // Collect the directory's .flash entries and sort them, so the
        // report order is stable across runs and platforms.
        var dir = try Io.Dir.cwd().openDir(io, dir_path, .{ .iterate = true });
        defer dir.close(io);
        var names: std.ArrayList([]const u8) = .empty;
        var it = dir.iterate();
        while (try it.next(io)) |entry| {
            if (entry.kind != .file) continue;
            if (!std.mem.endsWith(u8, entry.name, ".flash")) continue;
            try names.append(arena, try arena.dupe(u8, entry.name));
        }
        std.mem.sort([]const u8, names.items, {}, lessThanStr);
        for (names.items) |name| {
            const path = try std.fs.path.join(arena, &.{ dir_path, name });
            files += 1;
            mismatches += try compareFile(arena, io, out, stage0, stage1, path);
        }
    }

    if (mismatches > 0) {
        try out.print("diff-corpus: {d} mismatch(es) across {d} files\n", .{ mismatches, files });
        try out.flush();
        std.process.exit(1);
    }
    try out.print("diff-corpus: {d} files x 3 modes, no differences\n", .{files});
}

fn lessThanStr(_: void, a: []const u8, b: []const u8) bool {
    return std.mem.lessThan(u8, a, b);
}

// One observed run: captured streams plus how the process ended.
const Observed = struct {
    stdout: []u8,
    stderr: []u8,
    term: std.process.Child.Term,
};

fn runOnce(arena: std.mem.Allocator, io: Io, argv: []const []const u8) !Observed {
    const r = try std.process.run(arena, io, .{ .argv = argv });
    return .{ .stdout = r.stdout, .stderr = r.stderr, .term = r.term };
}

fn sameObserved(a: Observed, b: Observed) bool {
    return std.meta.eql(a.term, b.term) and
        std.mem.eql(u8, a.stdout, b.stdout) and
        std.mem.eql(u8, a.stderr, b.stderr);
}

// Compare one source file across the three modes. Returns the number of
// modes (0–3) in which the two binaries diverged.
fn compareFile(
    arena: std.mem.Allocator,
    io: Io,
    out: *Io.Writer,
    stage0: []const u8,
    stage1: []const u8,
    path: []const u8,
) !usize {
    var bad: usize = 0;

    // The two read-only modes share a shape: same argv tail, two binaries.
    const direct_modes = [_]struct { tag: []const u8, flag: ?[]const u8 }{
        .{ .tag = "transpile", .flag = null },
        .{ .tag = "tokens", .flag = "--dump-tokens" },
    };
    for (direct_modes) |mode| {
        var argv: std.ArrayList([]const u8) = .empty;
        try argv.append(arena, stage0);
        if (mode.flag) |f| try argv.append(arena, f);
        try argv.append(arena, path);
        const a = try runOnce(arena, io, argv.items);
        var argv1: std.ArrayList([]const u8) = .empty;
        try argv1.append(arena, stage1);
        try argv1.append(arena, "--plain-diagnostics");
        if (mode.flag) |f| try argv1.append(arena, f);
        try argv1.append(arena, path);
        const b = try runOnce(arena, io, argv1.items);
        if (!sameObserved(a, b)) {
            bad += 1;
            try out.print("diff-corpus: {s}: {s} differs\n", .{ path, mode.tag });
        }
    }

    // fmt rewrites in place, so each binary formats its own pristine copy
    // and the comparison is over the resulting bytes.
    const src = try Io.Dir.cwd().readFileAlloc(io, path, arena, .limited(max_file_size));
    const flat = try arena.dupe(u8, path);
    for (flat) |*c| {
        if (c.* == '/' or c.* == '\\') c.* = '_';
    }
    const copy0 = try std.fmt.allocPrint(arena, "{s}/s0_{s}", .{ tmp_root, flat });
    const copy1 = try std.fmt.allocPrint(arena, "{s}/s1_{s}", .{ tmp_root, flat });
    try Io.Dir.cwd().writeFile(io, .{ .sub_path = copy0, .data = src });
    try Io.Dir.cwd().writeFile(io, .{ .sub_path = copy1, .data = src });
    const f0 = try runOnce(arena, io, &.{ stage0, "fmt", copy0 });
    const f1 = try runOnce(arena, io, &.{ stage1, "--plain-diagnostics", "fmt", copy1 });
    const out0 = try Io.Dir.cwd().readFileAlloc(io, copy0, arena, .limited(max_file_size));
    const out1 = try Io.Dir.cwd().readFileAlloc(io, copy1, arena, .limited(max_file_size));
    const fmt_same = std.meta.eql(f0.term, f1.term) and
        std.mem.eql(u8, f0.stderr, f1.stderr) and
        std.mem.eql(u8, out0, out1);
    if (!fmt_same) {
        bad += 1;
        try out.print("diff-corpus: {s}: fmt differs\n", .{path});
    }

    return bad;
}