Zig 161 lines
// diff-corpus — the stage0-vs-stage1 differential harness.
//
// Runs two flashc binaries over every .flash file in the given directories
// and asserts byte-identical observable behaviour in three modes:
//
// * transpile (flashc FILE) — stdout, stderr, exit status
// * token stream (flashc --dump-tokens FILE) — stdout, stderr, exit status
// * formatting (flashc fmt COPY) — the rewritten file's bytes,
// stderr, exit status
//
// Files that the compiler rejects participate too: both binaries must then
// reject with the same diagnostics and the same exit status, so the corpus
// covers the error surface as well as the happy path. Any divergence is
// reported and the run exits non-zero. This byte-equality check is the
// license every hybrid module swap must renew.
//
// The stage1 binary renders diagnostics with a source caret by default;
// stage0 is frozen and prints the bare one-line form. Every stage1
// invocation therefore carries --plain-diagnostics, which restores the
// frozen bytes — so the comparison keeps pinning the diagnostic text itself.
//
// Usage: diff-corpus <stage0-flashc> <stage1-flashc> <dir> [dir ...]
const std = @import("std");
const Io = std.Io;
const tmp_root = ".zig-cache/diff-corpus";
const max_file_size: usize = 1 << 20;
pub fn main(init: std.process.Init) !void {
const io = init.io;
const arena = init.arena.allocator();
var stdout_buf: [4096]u8 = undefined;
var stdout_obj = std.Io.File.stdout().writer(io, &stdout_buf);
const out = &stdout_obj.interface;
defer out.flush() catch {};
const args = try init.minimal.args.toSlice(arena);
if (args.len < 4) {
try out.writeAll("usage: diff-corpus <stage0-flashc> <stage1-flashc> <dir> [dir ...]\n");
return error.BadArguments;
}
const stage0 = args[1];
const stage1 = args[2];
try Io.Dir.cwd().createDirPath(io, tmp_root);
var mismatches: usize = 0;
var files: usize = 0;
for (args[3..]) |dir_path| {
// Collect the directory's .flash entries and sort them, so the
// report order is stable across runs and platforms.
var dir = try Io.Dir.cwd().openDir(io, dir_path, .{ .iterate = true });
defer dir.close(io);
var names: std.ArrayList([]const u8) = .empty;
var it = dir.iterate();
while (try it.next(io)) |entry| {
if (entry.kind != .file) continue;
if (!std.mem.endsWith(u8, entry.name, ".flash")) continue;
try names.append(arena, try arena.dupe(u8, entry.name));
}
std.mem.sort([]const u8, names.items, {}, lessThanStr);
for (names.items) |name| {
const path = try std.fs.path.join(arena, &.{ dir_path, name });
files += 1;
mismatches += try compareFile(arena, io, out, stage0, stage1, path);
}
}
if (mismatches > 0) {
try out.print("diff-corpus: {d} mismatch(es) across {d} files\n", .{ mismatches, files });
try out.flush();
std.process.exit(1);
}
try out.print("diff-corpus: {d} files x 3 modes, no differences\n", .{files});
}
fn lessThanStr(_: void, a: []const u8, b: []const u8) bool {
return std.mem.lessThan(u8, a, b);
}
// One observed run: captured streams plus how the process ended.
const Observed = struct {
stdout: []u8,
stderr: []u8,
term: std.process.Child.Term,
};
fn runOnce(arena: std.mem.Allocator, io: Io, argv: []const []const u8) !Observed {
const r = try std.process.run(arena, io, .{ .argv = argv });
return .{ .stdout = r.stdout, .stderr = r.stderr, .term = r.term };
}
fn sameObserved(a: Observed, b: Observed) bool {
return std.meta.eql(a.term, b.term) and
std.mem.eql(u8, a.stdout, b.stdout) and
std.mem.eql(u8, a.stderr, b.stderr);
}
// Compare one source file across the three modes. Returns the number of
// modes (0–3) in which the two binaries diverged.
fn compareFile(
arena: std.mem.Allocator,
io: Io,
out: *Io.Writer,
stage0: []const u8,
stage1: []const u8,
path: []const u8,
) !usize {
var bad: usize = 0;
// The two read-only modes share a shape: same argv tail, two binaries.
const direct_modes = [_]struct { tag: []const u8, flag: ?[]const u8 }{
.{ .tag = "transpile", .flag = null },
.{ .tag = "tokens", .flag = "--dump-tokens" },
};
for (direct_modes) |mode| {
var argv: std.ArrayList([]const u8) = .empty;
try argv.append(arena, stage0);
if (mode.flag) |f| try argv.append(arena, f);
try argv.append(arena, path);
const a = try runOnce(arena, io, argv.items);
var argv1: std.ArrayList([]const u8) = .empty;
try argv1.append(arena, stage1);
try argv1.append(arena, "--plain-diagnostics");
if (mode.flag) |f| try argv1.append(arena, f);
try argv1.append(arena, path);
const b = try runOnce(arena, io, argv1.items);
if (!sameObserved(a, b)) {
bad += 1;
try out.print("diff-corpus: {s}: {s} differs\n", .{ path, mode.tag });
}
}
// fmt rewrites in place, so each binary formats its own pristine copy
// and the comparison is over the resulting bytes.
const src = try Io.Dir.cwd().readFileAlloc(io, path, arena, .limited(max_file_size));
const flat = try arena.dupe(u8, path);
for (flat) |*c| {
if (c.* == '/' or c.* == '\\') c.* = '_';
}
const copy0 = try std.fmt.allocPrint(arena, "{s}/s0_{s}", .{ tmp_root, flat });
const copy1 = try std.fmt.allocPrint(arena, "{s}/s1_{s}", .{ tmp_root, flat });
try Io.Dir.cwd().writeFile(io, .{ .sub_path = copy0, .data = src });
try Io.Dir.cwd().writeFile(io, .{ .sub_path = copy1, .data = src });
const f0 = try runOnce(arena, io, &.{ stage0, "fmt", copy0 });
const f1 = try runOnce(arena, io, &.{ stage1, "--plain-diagnostics", "fmt", copy1 });
const out0 = try Io.Dir.cwd().readFileAlloc(io, copy0, arena, .limited(max_file_size));
const out1 = try Io.Dir.cwd().readFileAlloc(io, copy1, arena, .limited(max_file_size));
const fmt_same = std.meta.eql(f0.term, f1.term) and
std.mem.eql(u8, f0.stderr, f1.stderr) and
std.mem.eql(u8, out0, out1);
if (!fmt_same) {
bad += 1;
try out.print("diff-corpus: {s}: fmt differs\n", .{path});
}
return bad;
}