ajhahn.de
← Flash
Flash 451 lines
// flashc — the Flash compiler driver.
//
// Flash is a small systems language whose backend lowers to Zig (the
// "Tier 0" strategy): the long game is to rewrite FlashOS and
// its shell in Flash, reusing the Zig toolchain for code generation while
// Flash and Zig sources coexist module by module during the migration.
//
// This driver wires the pipeline — lex -> parse -> sema -> lower — end to
// end: it reads a .flash file, parses it, runs the semantic checks,
// and writes the lowered Zig to stdout. Tokens can still be inspected on
// their own with `--dump-tokens`, and a file can be reformatted in place with
// `fmt`. Parse and semantic diagnostics are written to stderr, so the lowered
// source on stdout stays clean for redirection (`flashc file.flash > out.zig`);
// `-o <path>` writes it to a file instead. `flashc build <srcdir> <outdir>`
// transpiles every .flash file under srcdir into outdir, mirroring relative
// paths with a .zig extension and stopping non-zero on the first diagnostic —
// a transpile driver, not a build system: compiling the emitted Zig stays
// with the Zig toolchain (SETUP.md shows the build.zig wiring).
// A diagnostic prints with the offending source line and a caret under the
// anchored span; `--plain-diagnostics` (any argument position) restores the
// bare one-line form — the exact bytes the frozen stage0 compiler prints,
// which the differential harness compares against. `--anchors` (any argument
// position) prefixes every lowered top-level constant and function with a
// `// <file>:<line>` comment, so a Zig error in the emitted source traces
// back to its Flash line; without the flag, emission is byte-identical to
// the frozen default.
//
// The `main(init support.Init)` entry, the `Io` reader/writer interfaces,
// and `init.minimal.args` follow the host-tool conventions in the FlashOS
// tree (scripts/generate_syms.zig, tools/gen_shadow.zig). The file and
// process surface is reached through the support shim — this driver is the
// only module that touches a file or the environment.
//
// Usage:
//   flashc --version
//   flashc --dump-tokens <file.flash>
//   flashc fmt [--check] <file.flash>   (reformat a .flash file in place)
//   flashc <file.flash>                 (transpile to Zig, written to stdout)
//   flashc <file.flash> -o <out.zig>    (transpile to Zig, written to a file)
//   flashc build <srcdir> <outdir>      (transpile a source tree, mirrored)
//   flashc --plain-diagnostics ...      (one-line diagnostics, no caret)
//   flashc --anchors <file.flash>       (stamp file:line above each lowered decl)

use build_options
use "lexer"
use "parser"
use "sema"
use "lower"
use "fmt"
use "support" as sup

const Lexer = lexer.Lexer

const usage =
    \\flashc — the Flash compiler (Flash -> Zig)
    \\
    \\usage:
    \\  flashc --version
    \\  flashc --dump-tokens <file.flash>
    \\  flashc fmt [--check] <file.flash>
    \\  flashc <file.flash> [-o <out.zig>]
    \\  flashc build <srcdir> <outdir>
    \\
    \\flags:
    \\  --plain-diagnostics  print bare one-line diagnostics (no source caret)
    \\  --anchors            comment each lowered top-level decl with its
    \\                       Flash source file:line (tracing Zig errors back)
    \\

pub fn main(init sup.Init) !void {
    io := init.io
    arena := init.arena.allocator()

    var stdout_buf [4096]u8 = undefined
    var stdout_obj = sup.File.stdout().writer(io, &stdout_buf)
    out := &stdout_obj.interface
    defer out.flush() catch {}

    var stderr_buf [4096]u8 = undefined
    var stderr_obj = sup.File.stderr().writer(io, &stderr_buf)
    err_out := &stderr_obj.interface
    defer err_out.flush() catch {}

    raw_args := try init.minimal.args.toSlice(arena)
    // Strip `--plain-diagnostics` wherever it appears, so the flag composes
    // with every mode. The plain form is the frozen one-line rendering the
    // stage0 compiler still prints — the differential harness passes this
    // flag to the live compiler and relies on the bytes matching exactly.
    var plain = false
    // `--anchors` is stripped the same way, but threads through to lowering:
    // it switches the transpile path to anchored emission (file:line comments
    // above each top-level decl). The other modes ignore it.
    var anchors = false
    var args_list sup.List([]u8) = .empty
    for raw in raw_args {
        if sup.eql(u8, raw, "--plain-diagnostics") {
            plain = true
        } else if sup.eql(u8, raw, "--anchors") {
            anchors = true
        } else {
            try args_list.append(arena, raw)
        }
    }
    args := try args_list.toOwnedSlice(arena)
    if args.len < 2 {
        try out.writeAll(usage)
        return error.NoArguments
    }
    cmd := args[1]

    if sup.eql(u8, cmd, "--version") {
        try out.print("flashc {s}\n", .{build_options.version})
        return
    }

    if sup.eql(u8, cmd, "--help") || sup.eql(u8, cmd, "-h") {
        try out.writeAll(usage)
        return
    }

    if sup.eql(u8, cmd, "--dump-tokens") {
        if args.len < 3 {
            try out.writeAll("--dump-tokens needs a file\n")
            return error.NoInput
        }
        try dumpTokens(out, try sup.readFile(io, arena, args[2]))
        return
    }

    if sup.eql(u8, cmd, "fmt") {
        // `flashc fmt <file>` rewrites the file to canonical Flash; `--check`
        // writes nothing and exits non-zero when the file would change.
        var check_mode = false
        var fmt_path ?[]u8 = null
        var ai usize = 2
        while ai < args.len {
            a := args[ai]
            if sup.eql(u8, a, "--check") {
                check_mode = true
            } else if fmt_path == null {
                fmt_path = a
            }
            ai += 1
        }
        if fmt_path == null {
            try err_out.writeAll("fmt needs a file\n")
            try err_out.flush()
            return error.NoInput
        }
        fpath := fmt_path.?
        fsrc := try sup.readFile(io, arena, fpath)
        var fp = parser.Parser.init(arena, fsrc)
        fprog := fp.parseProgram() catch |err| switch err {
            // A parse error refuses the file untouched — a formatter must
            // never destroy code. Print the one-line diagnostic and exit
            // non-zero.
            error.UnexpectedToken => {
                if fp.diag |d| {
                    try err_out.print("flashc: {s}:{d}: error: {s}\n", .{ fpath, d.line, d.msg })
                    if !plain {
                        if d.anchor |anchor| {
                            try err_out.writeAll(try pointerBlock(arena, fsrc, anchor))
                        }
                    }
                } else {
                    try err_out.print("flashc: {s}: parse error\n", .{fpath})
                }
                try err_out.flush()
                sup.exit(1)
            },
            else => return err,
        }
        formatted := try fmt.render(arena, fprog, fp.comments, fsrc)
        // Already canonical: write nothing, change nothing.
        if sup.eql(u8, formatted, fsrc) {
            return
        }
        if check_mode {
            // Not canonical: report the path and exit non-zero, writing
            // nothing. exit() skips deferred flushes, so flush stdout
            // explicitly.
            try out.print("{s}\n", .{fpath})
            try out.flush()
            sup.exit(1)
        }
        try sup.writeFile(io, fpath, formatted)
        return
    }

    if sup.eql(u8, cmd, "build") {
        // `flashc build <srcdir> <outdir>`: transpile every .flash file under
        // srcdir into outdir, mirroring relative paths with a .zig extension.
        // The first diagnostic stops the build non-zero. This is a transpile
        // driver, not a build system — compiling the emitted Zig stays with
        // the Zig toolchain.
        if args.len < 4 {
            try err_out.writeAll("build needs a source dir and an output dir\n")
            try err_out.flush()
            return error.NoInput
        }
        srcdir := args[2]
        outdir := args[3]
        files := try sup.findFlashFiles(io, arena, srcdir)
        for rel in files {
            in_file := try sup.allocPrint(arena, "{s}/{s}", .{ srcdir, rel })
            zig_src := try transpileFile(io, arena, err_out, in_file, plain, anchors)
            // rel always ends in ".flash" (findFlashFiles filters on it), so
            // dropping the last six bytes leaves the stem for the .zig twin.
            out_file := try sup.allocPrint(arena, "{s}/{s}.zig", .{ outdir, rel[0 .. rel.len - 6] })
            try sup.makeDirPath(io, dirName(out_file))
            try sup.writeFile(io, out_file, zig_src)
        }
        return
    }

    // Otherwise treat the arguments as an input file and run the pipeline,
    // with `-o <path>` (any order) redirecting the lowered Zig into a file.
    var in_path ?[]u8 = null
    var out_path ?[]u8 = null
    var ai usize = 1
    while ai < args.len {
        a := args[ai]
        if sup.eql(u8, a, "-o") {
            ai += 1
            if ai >= args.len {
                try err_out.writeAll("-o needs an output path\n")
                try err_out.flush()
                return error.NoInput
            }
            out_path = args[ai]
        } else if in_path == null {
            in_path = a
        }
        ai += 1
    }
    if in_path == null {
        try out.writeAll(usage)
        return error.NoInput
    }
    zig_src := try transpileFile(io, arena, err_out, in_path.?, plain, anchors)
    if out_path |op| {
        try sup.writeFile(io, op, zig_src)
        return
    }
    try out.writeAll(zig_src)
}

// Read, parse, check, and lower one source file — the shared frontend of the
// single-file and build modes. Diagnostics print to err_out and exit the
// process non-zero, so a caller only ever sees the lowered Zig.
fn transpileFile(io sup.Io, arena sup.Allocator, err_out *mut sup.Io.Writer, path []u8, plain bool, anchors bool) ![]u8 {
    src := try sup.readFile(io, arena, path)

    var p = parser.Parser.init(arena, src)
    program := p.parseProgram() catch |err| switch err {
        // A user-facing syntax error: print the one-line diagnostic and exit
        // non-zero. exit() skips deferred flushes, so flush stderr explicitly.
        // Anything else — OutOfMemory and the like — is exceptional and
        // propagates.
        error.UnexpectedToken => {
            if p.diag |d| {
                try err_out.print("flashc: {s}:{d}: error: {s}\n", .{ path, d.line, d.msg })
                if !plain {
                    if d.anchor |anchor| {
                        try err_out.writeAll(try pointerBlock(arena, src, anchor))
                    }
                }
            } else {
                try err_out.print("flashc: {s}: parse error\n", .{path})
            }
            try err_out.flush()
            sup.exit(1)
        },
        else => return err,
    }

    diags := try sema.check(arena, program)
    if diags.len > 0 {
        // Report every diagnostic in source order (by anchor offset), then
        // exit non-zero. exit() skips deferred flushes, so flush stderr
        // explicitly.
        sup.sort(sema.Diag, diags, src, lessByAnchor)
        for d in diags {
            loc := sema.locate(src, d.anchor)
            try err_out.print("flashc: {s}:{d}:{d}: error: {s}\n", .{ path, loc.line, loc.col, d.msg })
            if !plain {
                try err_out.writeAll(try pointerBlock(arena, src, d.anchor))
            }
            if d.note_anchor |na| {
                nloc := sema.locate(src, na)
                try err_out.print("flashc: {s}:{d}:{d}: note: {s}\n", .{ path, nloc.line, nloc.col, d.note_msg.? })
                if !plain {
                    try err_out.writeAll(try pointerBlock(arena, src, na))
                }
            }
        }
        try err_out.flush()
        sup.exit(1)
    }

    if anchors {
        return lower.emitAnchored(arena, program, src, baseName(path))
    }
    return lower.emit(arena, program)
}

// The directory portion of a path — everything before the last '/'. Callers
// guarantee a separator exists: build-mode outputs are always
// `<outdir>/<relative path>`, so the result is never empty.
fn dirName(path []u8) []u8 {
    var i usize = path.len
    while i > 0 {
        if path[i - 1] == '/' {
            return path[0 .. i - 1]
        }
        i -= 1
    }
    return path
}

// The basename of the invoked path: anchors name the file only, never a
// directory — the invocation path is the caller's layout, not the output's.
fn baseName(path []u8) []u8 {
    var i usize = path.len
    while i > 0 {
        if path[i - 1] == '/' {
            return path[i..]
        }
        i -= 1
    }
    return path
}

// Order diagnostics by the byte offset of their anchor in the source, so they
// print top-to-bottom regardless of the order the checker collected them.
fn lessByAnchor(src []u8, a sema.Diag, b sema.Diag) bool {
    _ = src
    return #intFromPtr(a.anchor.ptr) < #intFromPtr(b.anchor.ptr)
}

// Render the indented source-pointer block that follows a diagnostic header
// in caret mode: the line the anchor starts on, then a `^` under the anchor's
// first column with a `~` tail covering the rest of the anchored span,
// clamped to the line end. Tabs in the line prefix are preserved in the pad
// so the caret stays aligned under tab-indented code. `anchor` MUST be a
// slice into `src` (the sema anchor invariant).
fn pointerBlock(alloc sup.Allocator, src []u8, anchor []u8) ![]u8 {
    off := #intFromPtr(anchor.ptr) - #intFromPtr(src.ptr)
    sup.assert(off <= src.len)
    var line_start usize = off
    while line_start > 0 && src[line_start - 1] != '\n' {
        line_start -= 1
    }
    var line_end usize = off
    while line_end < src.len && src[line_end] != '\n' {
        line_end += 1
    }
    var buf sup.List(u8) = .empty
    try buf.appendSlice(alloc, "    ")
    try buf.appendSlice(alloc, src[line_start..line_end])
    try buf.appendSlice(alloc, "\n    ")
    var i usize = line_start
    while i < off {
        if src[i] == '\t' {
            try buf.append(alloc, '\t')
        } else {
            try buf.append(alloc, ' ')
        }
        i += 1
    }
    try buf.append(alloc, '^')
    var span usize = anchor.len
    if off + span > line_end {
        span = line_end - off
    }
    var t usize = 1
    while t < span {
        try buf.append(alloc, '~')
        t += 1
    }
    try buf.append(alloc, '\n')
    return buf.toOwnedSlice(alloc)
}

fn dumpTokens(out *mut sup.Io.Writer, src []u8) !void {
    var lx = Lexer.init(src)
    while true {
        t := lx.next()
        try out.print("{d:>4}  {s:<12} {s}\n", .{ t.line, #tagName(t.kind), t.lexeme(src) })
        if t.kind == .eof {
            break
        }
    }
}

test "diagnostics order by anchor offset" {
    src := "ab"
    const first sema.Diag = .{ .anchor = src[0..1], .msg = "x" }
    const second sema.Diag = .{ .anchor = src[1..2], .msg = "y" }
    try sup.expect(lessByAnchor(src, first, second))
    try sup.expect(!lessByAnchor(src, second, first))
}

test "pointerBlock renders the line and a caret span" {
    var arena = sup.ArenaAllocator.init(sup.testAlloc)
    defer arena.deinit()
    src := "a := 1\nbb := top + 1\n"
    // The anchor is "top": line 2, column 7, three bytes wide.
    block := try pointerBlock(arena.allocator(), src, src[13..16])
    try sup.expectEqualStrings("    bb := top + 1\n          ^~~\n", block)
}

test "pointerBlock preserves tabs in the pad and clamps the span to the line" {
    var arena = sup.ArenaAllocator.init(sup.testAlloc)
    defer arena.deinit()
    src := "\tx y\nz"
    // The anchor runs past the line end: the tail clamps at the newline.
    block := try pointerBlock(arena.allocator(), src, src[1..6])
    try sup.expectEqualStrings("    \tx y\n    \t^~~\n", block)
}

test "a sema diagnostic's anchor renders a caret block" {
    var arena = sup.ArenaAllocator.init(sup.testAlloc)
    defer arena.deinit()
    src := "fn f() {\n    _ = nope.sys.write()\n}"
    var p = parser.Parser.init(arena.allocator(), src)
    prog := try p.parseProgram()
    diags := try sema.check(arena.allocator(), prog)
    try sup.expect(diags.len > 0)
    block := try pointerBlock(arena.allocator(), src, diags[0].anchor)
    try sup.expectEqualStrings("        _ = nope.sys.write()\n            ^~~~\n", block)
}

test "dirName keeps everything before the last separator" {
    try sup.expectEqualStrings("out/sub", dirName("out/sub/a.zig"))
    try sup.expectEqualStrings("out", dirName("out/a.zig"))
    try sup.expectEqualStrings("a.zig", dirName("a.zig"))
}

test "baseName drops every leading directory" {
    try sup.expectEqualStrings("a.flash", baseName("src/sub/a.flash"))
    try sup.expectEqualStrings("a.flash", baseName("a.flash"))
}

test "the driver surface is reachable" {
    _ = &main
    _ = &dumpTokens
    _ = &transpileFile
    _ = usage
}