Zig 2141 lines
// Flash formatter — AST back to canonical Flash source text.
//
// `flashc fmt` is gofmt / zig fmt for Flash: it parses a `.flash` file and
// re-emits it in one canonical layout. This module is the renderer — the
// inverse of lower.zig. Where lowering walks the AST to *Zig* text, the
// formatter walks the same AST to *Flash* text, so the two are mirror images
// and the emitter inventory lines up one-to-one (emitType / emitExpr /
// emitStmt / emitFn / …). The canonical layout is the lowering's layout rules
// transposed to Flash spelling: 4-space indent, one blank line between
// top-level units, the same brace-spacing zig fmt uses, mandatory braces.
//
// The Flash spelling is the lowering mapping read in reverse — the implicit
// `const` pointee that lowering makes explicit is dropped again here:
//
// const NAME = @import("X") <- use X (a bare module import)
// const Y = @import("X") <- use X as Y
// const Y = @import("X.zig") <- use "X" as Y (a sibling file; Flash names
// the stem only — no backend extension)
// comptime { _ = @import(M) } <- link "M" (kept as `link` lines, not
// folded into a comptime block)
// @name(a, …) <- #name(a, …) (the '#' builtin sigil)
// fn f(a: T) R <- fn f(a T) R (type after the name, no
// colon; a missing return is simply omitted)
// []const T / []T <- []T / []mut T (const-pointee default: the
// [*]const T / *const T <- [*]T / *T implicit `const` is dropped)
// x and y / x or y <- x && y / x || y (the logical operators keep
// their Flash spelling)
// const x = e (untyped) <- x := e (the short-declaration canon)
// if (c) { … } <- if c { … } (statement conditions carry
// while (c) { … } <- while c { … } no parentheses)
// for (xs) |x| { … } <- for x in xs { … }
// for (lo..hi) |i| { … } <- for i in lo..hi { … }
//
// A value `if` is the one conditional that keeps its parentheses
// (`if (c) a else b`), matching the surface grammar. Statements carry no
// trailing semicolon. Source blank lines between statements are preserved
// (collapsed to one); top-level units are always one blank line apart, a run
// of `use` (or a run of `link`) counting as a single unit.
//
// Three guarantees back the rewrite, each gated by the test suite: a parse
// error refuses the file untouched (a formatter never destroys code); every
// comment in the input appears exactly once in the output; and formatting
// never changes the emitted Zig — lower(parse(src)) equals
// lower(parse(fmt(src))) byte for byte — so a reformat can never alter a
// program's meaning. The formatter is also idempotent: fmt(fmt(src)) == fmt(src).
const std = @import("std");
const ast = @import("ast.zig");
const token = @import("token.zig");
const parser = @import("parser.zig");
const lower = @import("lower.zig");
const Parser = parser.Parser;
// Re-exported for the integration suite (tests/fmt_examples.zig): the lexer the
// formatter is built on, so a test can tokenize a formatted result to compare
// comment multisets without a second lexer module — which would place
// src/lexer.zig in two module graphs at once (a compile error).
pub const Lexer = @import("lexer.zig").Lexer;
pub const Error = error{OutOfMemory};
// Format `src` to canonical Flash text. Runs its own parser; a parse error
// propagates as parser.Error.UnexpectedToken (the caller reads the parser's
// diagnostic and leaves the file untouched). The returned slice is arena-owned.
pub fn format(arena: std.mem.Allocator, src: []const u8) parser.Error![]const u8 {
var p = Parser.init(arena, src);
const program = try p.parseProgram();
return render(arena, program, p.comments, src);
}
// Render an already-parsed program. `comments` is the source-ordered line
// comments the parser collected aside (the formatter reattaches them); `src`
// is the original buffer, used to recover blank-line and comment positions
// from the AST's source slices.
pub fn render(arena: std.mem.Allocator, program: ast.Program, comments: []const token.Token, src: []const u8) Error![]const u8 {
var p: Printer = .{ .arena = arena, .src = src, .comments = comments };
const items = program.items;
var first = true;
for (items, 0..) |item, idx| {
// Standalone comments before this item — the file header before the
// first item, a comment block before a declaration.
const lead = p.anchorOffset(itemLeadAnchor(item));
if (lead) |off| {
if (try p.flushStandalone(off, 0, first)) first = false;
}
// Blank lines between top-level items are PRESERVED, not imposed: a
// source blank before the item (or its lead-in comment) renders as one
// blank, and the author's tight grouping of consecutive declarations
// (a run of `use`, a block of `pub const` re-exports) is kept tight.
// The lowering's own "one blank between units" rule is for generated
// Zig; the formatter keeps what the author wrote. A `comptime { … }`
// block's only stored slice is its first statement, one line below the
// `comptime {` head, so the blank check steps up to the head line.
if (!first) {
var blank_anchor = lead;
if (item == .comptime_block) {
if (lead) |fs| blank_anchor = prevLineBreakOffset(p.src, fs);
}
if (blank_anchor) |off| if (blankBeforeOffset(p.src, off)) try p.raw("\n");
}
first = false;
p.boundary = p.nextOffset(items, idx + 1);
switch (item) {
.use_decl => |u| try p.emitUseDeclAt(u),
// The lowering folds a `link` run into one `comptime { … }` block;
// the formatter keeps the `link "M"` lines the author wrote.
.link_decl => |l| try p.print("link \"{s}\"", .{l.module}),
.const_decl => |c| try p.emitConstDecl(c),
.fn_decl => |f| try p.emitFn(f),
.comptime_block => |stmts| {
try p.raw("comptime ");
try p.emitBlockBody(stmts, 0);
},
.test_decl => |t| {
try p.print("test {s} ", .{t.name});
try p.emitBlockBody(t.body, 0);
},
}
try p.flushTrailing(p.anchorOffset(itemTailAnchor(item)));
try p.raw("\n");
}
// End of file: emit every comment that has not been placed yet, at depth 0
// (a file-tail comment, or one a placement heuristic could not site earlier).
_ = try p.flushStandalone(p.src.len, 0, first);
return p.buf.toOwnedSlice(arena);
}
const Printer = struct {
arena: std.mem.Allocator,
src: []const u8,
buf: std.ArrayList(u8) = .empty,
// The source-ordered line comments to reattach while walking the AST, and a
// cursor into them. The walk merges the two streams: at each element it
// flushes the comments that precede it (standalone, on their own lines) and
// appends a same-line one as a trailing comment. Every comment is emitted
// exactly once — anything not placed earlier is flushed at end of file.
comments: []const token.Token,
c_idx: usize = 0,
// The source offset just past the construct currently being emitted — the
// exclusive upper bound for a block-close comment flush, so a block never
// adopts comments that belong to a later sibling. Set by each sequence loop
// (items, statements) to the next element's offset; saved and restored
// around nested blocks.
boundary: usize = 0,
fn raw(self: *Printer, s: []const u8) Error!void {
try self.buf.appendSlice(self.arena, s);
}
fn print(self: *Printer, comptime fmt: []const u8, args: anytype) Error!void {
try self.buf.print(self.arena, fmt, args);
}
fn indent(self: *Printer, depth: usize) Error!void {
var k: usize = 0;
while (k < depth) : (k += 1) try self.raw(" ");
}
// --- comment plumbing ------------------------------------------------
// The byte offset of an AST source slice into `src`, or null when the slice
// is empty or not a view into the buffer (a defensive guard — every AST
// string is meant to be a real source slice).
fn anchorOffset(self: *Printer, slice: ?[]const u8) ?usize {
const a = slice orelse return null;
if (a.len == 0) return null;
const base = @intFromPtr(self.src.ptr);
const ap = @intFromPtr(a.ptr);
if (ap < base or ap >= base + self.src.len) return null;
return ap - base;
}
// The lead-anchor offset of items[idx], or end of file when idx is past the
// last item — the boundary for the preceding item's block-close flush.
fn nextOffset(self: *Printer, items: []const ast.Item, idx: usize) usize {
if (idx >= items.len) return self.src.len;
return self.anchorOffset(itemLeadAnchor(items[idx])) orelse self.src.len;
}
// Emit every pending standalone comment whose start is before `limit`, each
// on its own line at `depth`. A source blank line before a comment is
// preserved (collapsed to one), except before the first emitted line when
// `suppress_leading_blank`. Returns whether any comment was emitted.
fn flushStandalone(self: *Printer, limit: usize, depth: usize, suppress_leading_blank: bool) Error!bool {
var emitted = false;
while (self.c_idx < self.comments.len) {
const c = self.comments[self.c_idx];
if (c.start >= limit) break;
if (emitted or !suppress_leading_blank) {
if (blankBeforeOffset(self.src, c.start)) try self.raw("\n");
}
try self.indent(depth);
try self.raw(c.lexeme(self.src));
try self.raw("\n");
emitted = true;
self.c_idx += 1;
}
return emitted;
}
// If the next pending comment is a trailing comment on the same source line
// as the element anchored at `anchor` (no newline between), append it to the
// current output line as ` <lexeme>` and consume it.
fn flushTrailing(self: *Printer, anchor: ?usize) Error!void {
const off = anchor orelse return;
if (self.c_idx >= self.comments.len) return;
const c = self.comments[self.c_idx];
if (commentIsTrailing(self.src, c.start) and noNewlineBetween(self.src, off, c.start)) {
try self.raw(" ");
try self.raw(c.lexeme(self.src));
self.c_idx += 1;
}
}
// At a block's closing brace (its statements were at `inner_depth`), flush
// the pending comments that belong inside — those before `boundary` (the
// next sibling after the block) AND indented past the block's owner. The
// offset bound stops a block from adopting a later sibling's comments; the
// relative-column rule keeps a comment that lines up with the owner outside.
// Together they site a block-final comment without the brace's own offset,
// which the tree does not carry.
fn flushBlockClose(self: *Printer, inner_depth: usize, boundary: usize) Error!void {
const threshold: usize = if (inner_depth == 0) 0 else (inner_depth - 1) * 4;
while (self.c_idx < self.comments.len) {
const c = self.comments[self.c_idx];
if (c.start >= boundary) break;
if (commentColumn(self.src, c.start) <= threshold) break;
if (blankBeforeOffset(self.src, c.start)) try self.raw("\n");
try self.indent(inner_depth);
try self.raw(c.lexeme(self.src));
try self.raw("\n");
self.c_idx += 1;
}
}
// --- items -----------------------------------------------------------
// A top-level function: emitted at depth 0, terminated with a newline. The
// depth-aware body lives in emitFnAt, which a struct method reuses.
// A top-level function: doc block, then the signature and body. The caller
// (render) flushes a trailing comment and emits the line break, so a
// same-line comment on a one-line declaration can still attach.
fn emitFn(self: *Printer, f: ast.FnDecl) Error!void {
try self.emitDoc(f.doc, 0);
try self.emitFnAt(f, 0);
}
// Emit a function whose signature starts at the current column and whose
// closing brace returns to `depth`. Flash spells parameters `name type`
// (no colon), drops the `->` before the return type, and omits the return
// entirely when absent (the lowering's `void` is implicit). A bodyless
// `extern` prototype simply ends — Flash has no terminating `;`.
fn emitFnAt(self: *Printer, f: ast.FnDecl, depth: usize) Error!void {
if (f.is_pub) try self.raw("pub ");
if (f.is_export) try self.raw("export ");
if (f.is_extern) try self.raw("extern ");
if (f.is_inline) try self.raw("inline ");
try self.print("fn {s}(", .{f.name});
for (f.params, 0..) |prm, idx| {
if (idx != 0) try self.raw(", ");
if (prm.is_comptime) try self.raw("comptime ");
try self.raw(prm.name orelse "_");
try self.raw(" ");
try self.emitType(prm.type);
}
try self.raw(")");
// An explicit `callconv(…)` sits between the parameter list and the
// return type. The formatter emits it only when the source wrote one —
// the implicit C ABI of a bare `export fn` is the lowering's to add, so
// re-emitting it here would invent surface the author did not write.
if (f.call_conv) |cc| {
try self.raw(" callconv(");
try self.emitExpr(cc);
try self.raw(")");
}
// The return type follows directly, on the same physical line as the
// `)` (which is how the parser knows it is a return, not the next item).
if (f.ret) |r| {
try self.raw(" ");
try self.emitType(r);
}
if (f.body) |body| {
try self.raw(" ");
try self.emitBlockBody(body, depth);
}
}
// A top-level constant: doc block, then the declaration. The caller (render)
// flushes a trailing comment and emits the line break.
fn emitConstDecl(self: *Printer, c: ast.ConstDecl) Error!void {
try self.emitDoc(c.doc, 0);
try self.emitConstDeclAt(c, 0);
}
// Emit `[pub ](const|var) NAME[ T][ align(e)] = value` at the current column,
// ending at the value with no trailing newline. `depth` threads into the
// value so a multiline string or a nested type definition lays out one level
// deeper. A top-level constant is never rewritten to `:=` (the short
// declaration is statement-only grammar).
fn emitConstDeclAt(self: *Printer, c: ast.ConstDecl, depth: usize) Error!void {
if (c.is_pub) try self.raw("pub ");
try self.raw(if (c.is_mut) "var " else "const ");
try self.raw(c.name);
if (c.type) |ty| {
try self.raw(" ");
try self.emitType(ty);
}
if (c.value == .multiline_str) {
try self.raw(" ");
try self.emitMultilineRhs(c.value.multiline_str, depth);
} else {
try self.raw(" = ");
try self.emitValue(c.value, depth);
}
}
// Emit one import as `[pub ]use TARGET[ as ALIAS]`. A quoted file import
// names the module stem in quotes (`use "syscalls" as sys`); a bare module
// import names it unquoted (`use flibc`). The same form serves at the top
// level and inside a struct body.
fn emitUseDeclAt(self: *Printer, u: ast.UseDecl) Error!void {
if (u.is_pub) try self.raw("pub ");
try self.raw("use ");
if (u.is_file) {
try self.raw("\"");
try self.raw(u.module);
try self.raw("\"");
} else {
try self.raw(u.module);
}
if (u.alias) |a| {
try self.raw(" as ");
try self.raw(a);
}
}
// Lay out a multiline-string value in assignment-RHS position. The caller
// has emitted the left-hand side and a trailing space, up to but not
// including the `=`. Produces, matching the lowering minus its trailing `;`:
// =\n <\\lines at depth+1>
// ending on the last `\\` line with no trailing newline (the caller closes
// the statement). `depth` is the statement's own indent.
fn emitMultilineRhs(self: *Printer, lines: []const []const u8, depth: usize) Error!void {
try self.raw("=\n");
for (lines, 0..) |ln, idx| {
if (idx != 0) try self.raw("\n");
try self.indent(depth + 1);
try self.raw("\\\\");
try self.raw(ln);
}
}
// Emit the value of a binding or constant. A struct/enum/union type
// definition lays out across multiple lines with its closing brace at
// `depth`; every other value is a single-line expression.
fn emitValue(self: *Printer, value: ast.Expr, depth: usize) Error!void {
switch (value) {
.struct_def, .enum_def, .union_def => try self.emitTypeDef(value, depth),
else => try self.emitExprAt(value, depth),
}
}
// Lay out a `struct { … }` / `enum { … }` / `union(…) { … }` definition.
// Fields/variants sit one per line at `depth + 1` with a trailing comma; the
// closing brace returns to `depth`. Flash spells a field `name type` (no
// colon), exactly as a parameter.
fn emitTypeDef(self: *Printer, x: ast.Expr, depth: usize) Error!void {
switch (x) {
.struct_def => |sd| {
try self.raw("struct {\n");
const sb = self.boundary; // the offset just past the whole struct
var firstm = true;
for (sd.fields) |f| {
const lead = self.anchorOffset(if (f.doc.len > 0) f.doc[0] else f.name);
if (lead) |off| {
if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false;
}
firstm = false;
try self.emitDoc(f.doc, depth + 1);
try self.indent(depth + 1);
try self.raw(f.name);
try self.raw(" ");
try self.emitType(f.type);
if (f.default) |d| {
try self.raw(" = ");
try self.emitExpr(d);
}
try self.raw(",");
try self.flushTrailing(self.anchorOffset(f.name));
try self.raw("\n");
}
try self.emitContainerDecls(sd.decls, sd.fields.len != 0, depth, sb);
try self.flushBlockClose(depth + 1, sb);
try self.indent(depth);
try self.raw("}");
},
.enum_def => |ed| {
try self.raw("enum");
if (ed.tag_type) |t| {
try self.raw("(");
try self.raw(t);
try self.raw(")");
}
try self.raw(" {\n");
const sb = self.boundary; // the offset just past the whole enum
var firstm = true;
for (ed.variants) |v| {
const lead = self.anchorOffset(if (v.doc.len > 0) v.doc[0] else v.name);
if (lead) |off| {
if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false;
}
firstm = false;
try self.emitDoc(v.doc, depth + 1);
try self.indent(depth + 1);
try self.raw(v.name);
if (v.value) |val| {
try self.raw(" = ");
try self.emitExpr(val.*);
}
try self.raw(",");
try self.flushTrailing(self.anchorOffset(v.name));
try self.raw("\n");
}
try self.emitContainerDecls(ed.decls, ed.variants.len != 0, depth, sb);
try self.flushBlockClose(depth + 1, sb);
try self.indent(depth);
try self.raw("}");
},
.union_def => |ud| {
try self.raw("union");
if (ud.tag) |t| {
try self.raw("(");
try self.raw(t);
try self.raw(")");
}
try self.raw(" {\n");
const sb = self.boundary; // the offset just past the whole union
var firstm = true;
for (ud.variants) |v| {
const lead = self.anchorOffset(if (v.doc.len > 0) v.doc[0] else v.name);
if (lead) |off| {
if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false;
}
firstm = false;
try self.emitDoc(v.doc, depth + 1);
try self.indent(depth + 1);
try self.raw(v.name);
// A payload type renders `name type` (no colon); a bare name
// is a void variant.
if (v.payload) |ty| {
try self.raw(" ");
try self.emitType(ty);
}
try self.raw(",");
try self.flushTrailing(self.anchorOffset(v.name));
try self.raw("\n");
}
try self.emitContainerDecls(ud.decls, ud.variants.len != 0, depth, sb);
try self.flushBlockClose(depth + 1, sb);
try self.indent(depth);
try self.raw("}");
},
else => unreachable,
}
}
// Associated declarations follow a container's fields/variants, each
// preceded by a blank line (one after the member block, one between decls) —
// the idiomatic container layout. A container whose first member is a
// declaration gets no leading blank. `sb` is the offset just past the whole
// container, restored as the boundary when the decls are done.
fn emitContainerDecls(self: *Printer, decls: []const ast.ContainerDecl, has_members: bool, depth: usize, sb: usize) Error!void {
for (decls, 0..) |d, idx| {
if (idx != 0 or has_members) try self.raw("\n");
const lead = self.anchorOffset(declLeadAnchor(d));
if (lead) |off| _ = try self.flushStandalone(off, depth + 1, true);
// The next declaration (or the container boundary) bounds this
// one's method-body block-close flushes, so a method never
// adopts a comment that belongs to a later method.
self.boundary = if (idx + 1 < decls.len)
(self.anchorOffset(declLeadAnchor(decls[idx + 1])) orelse sb)
else
sb;
switch (d) {
.method => |m| {
try self.emitDoc(m.doc, depth + 1);
try self.indent(depth + 1);
try self.emitFnAt(m, depth + 1);
},
.constant => |c| {
try self.emitDoc(c.doc, depth + 1);
try self.indent(depth + 1);
try self.emitConstDeclAt(c, depth + 1);
},
.use_import => |u| {
try self.indent(depth + 1);
try self.emitUseDeclAt(u);
},
}
try self.flushTrailing(self.anchorOffset(declTailAnchor(d)));
try self.raw("\n");
}
self.boundary = sb;
}
// --- statements ------------------------------------------------------
// Emit a brace-delimited block body, opening at the current column. An empty
// statement list collapses to `{}`; a non-empty one opens `{`, lays out one
// statement per line at `depth + 1`, and closes `}` back at `depth`.
fn emitBlockBody(self: *Printer, stmts: []const ast.Stmt, depth: usize) Error!void {
if (stmts.len == 0) {
try self.raw("{}");
return;
}
try self.raw("{\n");
try self.emitBlock(stmts, depth + 1);
try self.indent(depth);
try self.raw("}");
}
// Emit a block's statements, each on its own line at the given indent depth.
// A source blank line between two statements is preserved (collapsed to a
// single blank); there is never a blank after the opening `{` (the first
// statement carries none) or before the closing `}`.
fn emitBlock(self: *Printer, stmts: []const ast.Stmt, depth: usize) Error!void {
const bb = self.boundary; // the offset just past this whole block
var first = true;
for (stmts, 0..) |s, idx| {
const aoff = self.anchorOffset(stmtAnchor(s));
if (aoff) |off| {
if (try self.flushStandalone(off, depth, first)) first = false;
}
if (!first) {
if (aoff) |off| if (blankBeforeOffset(self.src, off)) try self.raw("\n");
}
first = false;
// The next statement (or the block boundary, for the last) bounds
// this statement's own inner block-close flushes.
self.boundary = if (idx + 1 < stmts.len)
(self.anchorOffset(stmtAnchor(stmts[idx + 1])) orelse bb)
else
bb;
try self.indent(depth);
try self.emitStmt(s, depth);
try self.flushTrailing(aoff);
try self.raw("\n");
}
self.boundary = bb;
// Comments between the last statement and the closing brace that are
// indented past the block's owner belong inside; flush them here.
try self.flushBlockClose(depth, bb);
}
// Emit a `///` doc-comment block: one line per entry at `depth`, the three
// slashes plus the preserved content. An empty `doc` emits nothing.
fn emitDoc(self: *Printer, doc: []const []const u8, depth: usize) Error!void {
for (doc) |line| {
try self.indent(depth);
try self.raw("///");
try self.raw(line);
try self.raw("\n");
}
}
fn emitStmt(self: *Printer, s: ast.Stmt, depth: usize) Error!void {
switch (s) {
.discard => |x| {
if (x == .multiline_str) {
try self.raw("_ ");
try self.emitMultilineRhs(x.multiline_str, depth);
} else {
try self.raw("_ = ");
try self.emitExprAt(x, depth);
}
},
.bind => |b| {
// The short-declaration canon: an untyped, non-`align`,
// non-`comptime` immutable binding renders `name := value`,
// whatever spelling the author used. `name := e` and an untyped
// `const name = e` lower identically, so this changes only the
// surface form, never the meaning. Every other binding — `var`, a
// typed or aligned `const`, a `comptime` local — keeps its keyword
// form (`:=` has no typed, mutable, or comptime spelling).
const short = !b.is_mut and b.type == null and b.align_expr == null and !b.is_comptime;
if (short) {
try self.raw(b.name);
if (b.value == .multiline_str) {
try self.raw(" :");
try self.emitMultilineRhs(b.value.multiline_str, depth);
} else {
try self.raw(" := ");
try self.emitValue(b.value, depth);
}
} else {
if (b.is_comptime) try self.raw("comptime ");
try self.raw(if (b.is_mut) "var " else "const ");
try self.raw(b.name);
if (b.type) |ty| {
try self.raw(" ");
try self.emitType(ty);
}
if (b.align_expr) |ae| {
try self.raw(" align(");
try self.emitExpr(ae);
try self.raw(")");
}
if (b.value == .multiline_str) {
try self.raw(" ");
try self.emitMultilineRhs(b.value.multiline_str, depth);
} else {
try self.raw(" = ");
try self.emitValue(b.value, depth);
}
}
},
.assign => |a| {
try self.emitExprAt(a.target, depth);
try self.raw(" ");
try self.raw(a.op); // "=", "+=", … verbatim
try self.raw(" ");
try self.emitExprAt(a.value, depth);
},
// The `:=` canon extends to destructures: an immutable one renders
// `a, b := e` whether the author wrote that or `const a, b = e`
// (a destructure has no type, `align`, or `comptime` spelling to
// block the rewrite); a mutable one keeps `var a, b = e`.
.destructure => |d| {
if (d.is_mut) try self.raw("var ");
for (d.names, 0..) |maybe, i| {
if (i != 0) try self.raw(", ");
try self.raw(maybe orelse "_");
}
try self.raw(if (d.is_mut) " = " else " := ");
try self.emitValue(d.value, depth);
},
.destructure_assign => |da| {
for (da.targets, 0..) |t, i| {
if (i != 0) try self.raw(", ");
try self.emitExprAt(t, depth);
}
try self.raw(" = ");
try self.emitExprAt(da.value, depth);
},
.if_stmt => |iff| try self.emitIf(iff, depth),
.defer_stmt => |inner| {
try self.raw("defer ");
try self.emitStmt(inner.*, depth);
},
.errdefer_stmt => |inner| {
try self.raw("errdefer ");
try self.emitStmt(inner.*, depth);
},
.defer_block => |stmts| {
try self.raw("defer ");
try self.emitBlockBody(stmts, depth);
},
.errdefer_block => |stmts| {
try self.raw("errdefer ");
try self.emitBlockBody(stmts, depth);
},
.while_stmt => |w| {
if (w.is_inline) try self.raw("inline ");
try self.raw("while ");
try self.emitExprAt(w.cond, depth);
if (w.capture) |cap| {
try self.raw(" |");
try self.raw(cap);
try self.raw("|");
}
try self.raw(" ");
try self.emitLoopBody(w.body, w.else_body, w.else_capture, depth);
},
.for_stmt => |fr| {
if (fr.is_inline) try self.raw("inline ");
try self.raw("for ");
for (fr.captures, 0..) |c, i| {
if (i != 0) try self.raw(", ");
try self.raw(c);
}
try self.raw(" in ");
try self.emitExprAt(fr.iter, depth);
if (fr.range_hi) |hi| {
try self.raw("..");
try self.emitExprAt(hi, depth);
}
try self.raw(" ");
try self.emitLoopBody(fr.body, fr.else_body, null, depth);
},
.expr => |x| try self.emitExprAt(x, depth),
}
}
// `if cond { … }`, with an `else { … }` arm or, when the else body is exactly
// one nested if, an idiomatic `else if … { … }` chain. The condition carries
// no parentheses (the statement form).
fn emitIf(self: *Printer, iff: ast.If, depth: usize) Error!void {
try self.raw("if ");
try self.emitExprAt(iff.cond, depth);
if (iff.capture) |cap| {
try self.raw(" |");
try self.raw(cap);
try self.raw("|");
}
try self.raw(" ");
const after_if = self.boundary;
if (iff.else_body) |eb| {
// The then-body's block-close is bounded by the else clause, so it
// does not adopt comments that belong to the else arm.
self.boundary = self.anchorOffset(elseAnchor(eb)) orelse after_if;
try self.emitBlockBody(iff.body, depth);
self.boundary = after_if;
if (eb.len == 1 and eb[0] == .if_stmt) {
try self.raw(" else ");
try self.emitIf(eb[0].if_stmt, depth);
} else {
// ` else { … }`, the error capture printed as ` else |err| { … }`.
try self.raw(" else ");
if (iff.else_capture) |cap| {
try self.raw("|");
try self.raw(cap);
try self.raw("| ");
}
try self.emitBlockBody(eb, depth);
}
} else {
try self.emitBlockBody(iff.body, depth);
}
}
// A loop body with its optional `else` arm (`while`/`for … else`). Mirrors
// emitIf's else handling: the body's block-close is bounded by the else
// clause so it does not adopt the else arm's comments; the capture (the
// `while` error binding) prints as ` else |err| { … }`.
fn emitLoopBody(self: *Printer, body: []ast.Stmt, else_body: ?[]ast.Stmt, else_capture: ?[]const u8, depth: usize) Error!void {
const after_loop = self.boundary;
if (else_body) |eb| {
self.boundary = self.anchorOffset(elseAnchor(eb)) orelse after_loop;
try self.emitBlockBody(body, depth);
self.boundary = after_loop;
try self.raw(" else ");
if (else_capture) |cap| {
try self.raw("|");
try self.raw(cap);
try self.raw("| ");
}
try self.emitBlockBody(eb, depth);
} else {
try self.emitBlockBody(body, depth);
}
}
// --- expressions -----------------------------------------------------
// The depth-0 wrapper, for inline-only callers (type length / sentinel
// expressions, struct-field and enum-variant defaults) where an expression
// never spans multiple lines.
fn emitExpr(self: *Printer, x: ast.Expr) Error!void {
try self.emitExprAt(x, 0);
}
// Emit an expression at indentation `depth`. Most forms are single-line and
// thread `depth` unchanged; the multi-line forms — a labeled block and the
// `switch` expression — lay their inner statements / prongs out at `depth + 1`
// and close at `depth`.
fn emitExprAt(self: *Printer, x: ast.Expr, depth: usize) Error!void {
switch (x) {
.int, .float, .string, .char, .ident, .value_word => |s| try self.raw(s),
.multiline_str => |lines| {
// Reached only outside a const/binding/discard value (a call
// argument, an asm template). Indentation before `\\` does not
// affect the value; the byte-exact layout is guaranteed for the
// routed value positions, not here (the same deliberate limit the
// lowering carries).
try self.raw("\n");
for (lines) |ln| {
try self.raw("\\\\");
try self.raw(ln);
try self.raw("\n");
}
},
.member => |m| {
try self.emitExprAt(m.base.*, depth);
try self.raw(".");
try self.raw(m.field);
},
.deref => |d| {
try self.emitExprAt(d.*, depth);
try self.raw(".*");
},
.optional_unwrap => |u| {
try self.emitExprAt(u.*, depth);
try self.raw(".?");
},
.call => |c| {
try self.emitExprAt(c.callee.*, depth);
try self.emitArgs(c.args, depth);
},
.index => |ix| {
try self.emitExprAt(ix.base.*, depth);
try self.raw("[");
try self.emitExprAt(ix.index.*, depth);
try self.raw("]");
},
.slice => |s| {
try self.emitExprAt(s.base.*, depth);
try self.raw("[");
try self.emitExprAt(s.lo.*, depth);
const spaced = sliceBoundSpaces(s.lo.*) or
(s.hi != null and sliceBoundSpaces(s.hi.?.*));
if (spaced) try self.raw(" ");
try self.raw("..");
if (s.hi) |hi| {
if (spaced) try self.raw(" ");
try self.emitExprAt(hi.*, depth);
}
if (s.sentinel) |sen| {
try self.raw(" :");
try self.emitExprAt(sen.*, depth);
}
try self.raw("]");
},
.builtin_call => |b| {
// The AST holds the bare intrinsic name; Flash spells it with the
// '#' sigil (the lowering's '@' is the Tier-0 backend's).
try self.raw("#");
try self.raw(b.name);
try self.emitArgs(b.args, depth);
},
.unary => |u| {
try self.raw(u.op);
try self.emitExprAt(u.operand.*, depth);
},
.binary => |b| {
try self.emitExprAt(b.lhs.*, depth);
try self.raw(" ");
try self.raw(b.op); // verbatim — `&&` / `||` keep their Flash spelling
try self.raw(" ");
try self.emitExprAt(b.rhs.*, depth);
},
.struct_lit => |fields| {
const spaced = !(fields.len == 0 or (fields.len == 1 and fields[0].name == null));
try self.raw(if (spaced) ".{ " else ".{");
for (fields, 0..) |f, idx| {
if (idx != 0) try self.raw(", ");
if (f.name) |n| {
try self.raw(".");
try self.raw(n);
try self.raw(" = ");
}
try self.emitExprAt(f.value, depth);
}
try self.raw(if (spaced) " }" else "}");
},
.typed_lit => |tl| {
try self.emitExprAt(tl.type.*, depth);
const spaced = !(tl.fields.len == 0 or (tl.fields.len == 1 and tl.fields[0].name == null));
try self.raw(if (spaced) "{ " else "{");
for (tl.fields, 0..) |f, idx| {
if (idx != 0) try self.raw(", ");
if (f.name) |n| {
try self.raw(".");
try self.raw(n);
try self.raw(" = ");
}
try self.emitExprAt(f.value, depth);
}
try self.raw(if (spaced) " }" else "}");
},
.type_lit => |t| try self.emitType(t.*),
.enum_lit => |v| {
try self.raw(".");
try self.raw(v);
},
.error_lit => |n| {
try self.raw("error.");
try self.raw(n);
},
.error_set => |names| {
const spaced = names.len > 1;
try self.raw(if (spaced) "error{ " else "error{");
for (names, 0..) |n, idx| {
if (idx != 0) try self.raw(", ");
try self.raw(n);
}
try self.raw(if (spaced) " }" else "}");
},
.struct_def, .enum_def, .union_def => try self.emitTypeDef(x, depth),
.group => |g| {
try self.raw("(");
try self.emitExprAt(g.*, depth);
try self.raw(")");
},
// A value `if` keeps its parentheses, the one conditional that does —
// `if (cond) a else b`, exactly the surface grammar requires.
.if_expr => |iff| {
try self.raw("if (");
try self.emitExprAt(iff.cond.*, depth);
try self.raw(") ");
try self.emitExprAt(iff.then.*, depth);
try self.raw(" else ");
try self.emitExprAt(iff.else_.*, depth);
},
// `switch subject { … }` — the subject carries no parentheses (the
// statement-header form); prongs lay out one per line at depth + 1.
.switch_expr => |sw| {
try self.raw("switch ");
try self.emitExprAt(sw.subject.*, depth);
try self.raw(" {\n");
const swb = self.boundary; // the offset just past the whole switch
var firstm = true;
for (sw.prongs, 0..) |prong, pidx| {
const lead = if (prong.patterns.len > 0)
self.anchorOffset(exprAnchor(prong.patterns[0].lo))
else
null;
if (lead) |off| {
if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false;
}
firstm = false;
// The next prong (or the switch boundary, for the last)
// bounds this prong's own inner block-close flushes, so a
// block-bodied prong never adopts a later prong's comments.
self.boundary = if (pidx + 1 < sw.prongs.len)
(self.anchorOffset(prongAnchor(sw.prongs[pidx + 1])) orelse swb)
else
swb;
try self.indent(depth + 1);
if (prong.is_else) {
try self.raw("else");
} else {
for (prong.patterns, 0..) |pat, idx| {
if (idx != 0) try self.raw(", ");
try self.emitExprAt(pat.lo, depth + 1);
if (pat.hi) |hi| {
try self.raw("...");
try self.emitExprAt(hi, depth + 1);
}
}
}
try self.raw(" => ");
if (prong.capture) |cap| {
try self.raw("|");
try self.raw(cap);
try self.raw("| ");
}
try self.emitExprAt(prong.body, depth + 1);
try self.raw(",");
try self.flushTrailing(lead);
try self.raw("\n");
}
self.boundary = swb;
try self.flushBlockClose(depth + 1, swb);
try self.indent(depth);
try self.raw("}");
},
.block_expr => |blk| {
if (blk.label) |label| {
try self.raw(label);
try self.raw(": ");
}
try self.emitBlockBody(blk.body, depth);
},
.try_expr => |t| {
try self.raw("try ");
try self.emitExprAt(t.*, depth);
},
.catch_expr => |c| {
try self.emitExprAt(c.lhs.*, depth);
try self.raw(" catch ");
if (c.capture) |cap| {
try self.raw("|");
try self.raw(cap);
try self.raw("| ");
}
try self.emitExprAt(c.handler.*, depth);
},
.asm_expr => |a| try self.emitAsm(a, depth),
.brk => |b| {
try self.raw("break");
if (b.label) |l| {
try self.raw(" :");
try self.raw(l);
}
if (b.value) |v| {
try self.raw(" ");
try self.emitExprAt(v.*, depth);
}
},
.cont => try self.raw("continue"),
.ret => |maybe| {
try self.raw("return");
if (maybe) |vals| {
// The value list re-emits as written: `return v` for one
// value, `return a, b` for the multi-return sugar (a
// written `return .{ a, b }` is ONE struct_lit value, so
// each spelling round-trips to itself).
try self.raw(" ");
for (vals, 0..) |v, idx| {
if (idx != 0) try self.raw(", ");
try self.emitExprAt(v, depth);
}
}
},
}
}
fn emitArgs(self: *Printer, args: []const ast.Expr, depth: usize) Error!void {
try self.raw("(");
for (args, 0..) |a, idx| {
if (idx != 0) try self.raw(", ");
try self.emitExprAt(a, depth);
}
try self.raw(")");
}
// --- types -----------------------------------------------------------
// The Flash spelling of a type — the lowering's mapping in reverse. The
// const-pointee default is implicit, so the pointer families drop the
// explicit `const` the lowering adds (`[]T`, `*T`, `[*]T`), and `mut` opts a
// pointee back into mutability. `argv` / `cstr` are ordinary names here: the
// builtin-alias expansion is the lowering's, not the surface's.
fn emitType(self: *Printer, t: ast.TypeRef) Error!void {
switch (t) {
.name => |n| try self.raw(n),
.slice => |inner| {
try self.raw("[]");
try self.emitType(inner.*);
},
.slice_mut => |inner| {
try self.raw("[]mut ");
try self.emitType(inner.*);
},
.slice_sentinel => |sp| {
try self.raw("[:");
try self.emitExpr(sp.sentinel.*);
try self.raw("]");
try self.emitType(sp.elem.*);
},
.slice_sentinel_mut => |sp| {
try self.raw("[:");
try self.emitExpr(sp.sentinel.*);
try self.raw("]mut ");
try self.emitType(sp.elem.*);
},
.many_ptr => |inner| {
try self.raw("[*]");
try self.emitType(inner.*);
},
.many_ptr_mut => |inner| {
try self.raw("[*]mut ");
try self.emitType(inner.*);
},
.many_ptr_volatile => |inner| {
try self.raw("[*]volatile ");
try self.emitType(inner.*);
},
.many_ptr_mut_volatile => |inner| {
try self.raw("[*]mut volatile ");
try self.emitType(inner.*);
},
.many_ptr_sentinel => |sp| {
try self.raw("[*:");
try self.emitExpr(sp.sentinel.*);
try self.raw("]");
try self.emitType(sp.elem.*);
},
.many_ptr_sentinel_mut => |sp| {
try self.raw("[*:");
try self.emitExpr(sp.sentinel.*);
try self.raw("]mut ");
try self.emitType(sp.elem.*);
},
.ptr => |inner| {
try self.raw("*");
try self.emitType(inner.*);
},
.ptr_mut => |inner| {
try self.raw("*mut ");
try self.emitType(inner.*);
},
.ptr_volatile => |inner| {
try self.raw("*volatile ");
try self.emitType(inner.*);
},
.ptr_mut_volatile => |inner| {
try self.raw("*mut volatile ");
try self.emitType(inner.*);
},
.array => |arr| {
try self.raw("[");
try self.emitExpr(arr.len.*);
try self.raw("]");
try self.emitType(arr.elem.*);
},
.array_sentinel => |a| {
try self.raw("[");
try self.emitExpr(a.len.*);
try self.raw(":");
try self.emitExpr(a.sentinel.*);
try self.raw("]");
try self.emitType(a.elem.*);
},
.array_inferred => |elem| {
try self.raw("[_]");
try self.emitType(elem.*);
},
.array_inferred_sentinel => |sp| {
try self.raw("[_:");
try self.emitExpr(sp.sentinel.*);
try self.raw("]");
try self.emitType(sp.elem.*);
},
.optional => |inner| {
try self.raw("?");
try self.emitType(inner.*);
},
.errunion => |eu| {
if (eu.set) |s| try self.emitType(s.*);
try self.raw("!");
try self.emitType(eu.payload.*);
},
.fn_type => |ft| {
// `fn(P, …) R` — Flash writes the parameter list tight after `fn`
// (no space), and omits the return when absent.
try self.raw("fn(");
for (ft.params, 0..) |p, idx| {
if (idx != 0) try self.raw(", ");
try self.emitType(p);
}
try self.raw(")");
if (ft.ret) |r| {
try self.raw(" ");
try self.emitType(r.*);
}
},
.generic => |g| {
try self.raw(g.name);
try self.raw("(");
for (g.args, 0..) |arg, idx| {
if (idx != 0) try self.raw(", ");
try self.emitExpr(arg);
}
try self.raw(")");
},
.tuple => |elems| {
// `(A, B)` — canonical form: one space after each comma, no
// trailing comma (a tolerated source trailing comma drops).
try self.raw("(");
for (elems, 0..) |e, idx| {
if (idx != 0) try self.raw(", ");
try self.emitType(e);
}
try self.raw(")");
},
}
}
// `asm [volatile] (…)` — inline assembly, the structure transposed from the
// lowering (the template and constraint strings are a foreign sublanguage
// that passes through unchanged; only the operand types and value expressions
// take Flash spelling, via emitType / emitExpr). An asm output operand keeps
// its `-> T` arrow, which the surface retains for this position.
fn emitAsm(self: *Printer, a: ast.AsmExpr, depth: usize) Error!void {
try self.raw("asm ");
if (a.is_volatile) try self.raw("volatile ");
try self.raw("(");
const ml_template = a.template.* == .multiline_str;
const multiline = ml_template or a.outputs.len > 0 or a.inputs.len > 0;
if (!multiline) {
try self.emitExprAt(a.template.*, depth);
if (a.clobbers) |c| {
try self.raw(" ::: ");
try self.emitExprAt(c.*, depth);
}
try self.raw(")");
return;
}
if (ml_template) {
try self.raw("\n");
for (a.template.*.multiline_str) |ln| {
try self.indent(depth + 1);
try self.raw("\\\\");
try self.raw(ln);
try self.raw("\n");
}
} else {
try self.emitExprAt(a.template.*, depth);
try self.raw("\n");
}
const n_sections: usize = if (a.clobbers != null)
3
else if (a.inputs.len > 0)
2
else if (a.outputs.len > 0)
1
else
0;
if (n_sections >= 1) {
try self.indent(depth + 1);
try self.raw(":");
try self.emitAsmOperandList(a.outputs, depth);
}
if (n_sections >= 2) {
try self.indent(depth + 1);
try self.raw(":");
try self.emitAsmOperandList(a.inputs, depth);
}
if (a.clobbers) |c| {
try self.indent(depth + 1);
try self.raw(": ");
try self.emitExprAt(c.*, depth);
try self.raw(")");
return;
}
try self.indent(depth);
try self.raw(")");
}
fn emitAsmOperandList(self: *Printer, ops: []const ast.AsmOperand, depth: usize) Error!void {
if (ops.len == 0) {
try self.raw("\n");
return;
}
for (ops, 0..) |op, idx| {
if (idx == 0) {
try self.raw(" ");
} else {
try self.indent(depth + 1);
try self.raw(" ");
}
try self.emitAsmOperand(op, depth);
try self.raw(",\n");
}
}
fn emitAsmOperand(self: *Printer, op: ast.AsmOperand, depth: usize) Error!void {
try self.raw("[");
try self.raw(op.name);
try self.raw("] ");
try self.raw(op.constraint);
try self.raw(" (");
switch (op.body) {
.ret_type => |t| {
try self.raw("-> ");
try self.emitType(t);
},
.expr => |e| try self.emitExprAt(e, depth),
}
try self.raw(")");
}
};
// Whether a slice bound forces a space around the `..`, mirroring the lowering
// (a binary operation or a `catch` spaces it; every other form stays tight).
fn sliceBoundSpaces(x: ast.Expr) bool {
return switch (x) {
.binary, .catch_expr => true,
else => false,
};
}
// Whether the source line immediately before byte `offset`'s line is blank
// (whitespace only) — the signal that the author left a paragraph break before
// the statement or comment at `offset`.
fn blankBeforeOffset(src: []const u8, offset: usize) bool {
var i = offset;
while (i > 0 and src[i - 1] != '\n') i -= 1; // start of offset's line
if (i == 0) return false; // first line of the file
const nl = i - 1; // the '\n' ending the previous line
var j = nl;
while (j > 0 and src[j - 1] != '\n') j -= 1; // start of the previous line
var t = j;
while (t < nl) : (t += 1) {
const c = src[t];
if (c != ' ' and c != '\t' and c != '\r') return false;
}
return true;
}
// The offset of the newline ending the line *before* `offset`'s line, or null
// when `offset` is already on the first line. Used to step a blank-line check up
// one line, for a construct whose first stored slice is one line below its head
// (a `comptime { … }` block, anchored at its first statement).
fn prevLineBreakOffset(src: []const u8, offset: usize) ?usize {
var i = offset;
while (i > 0 and src[i - 1] != '\n') i -= 1; // start of offset's line
if (i == 0) return null;
return i - 1; // the '\n' that ends the previous line
}
// The source column of byte `offset` — the count of characters from the start of
// its line. A comment's column is how deeply it is indented.
fn commentColumn(src: []const u8, offset: usize) usize {
var i = offset;
while (i > 0 and src[i - 1] != '\n') i -= 1;
return offset - i;
}
// Whether the comment starting at `start` is a trailing comment — some
// non-whitespace byte precedes it on its own source line. Otherwise it is a
// standalone comment that occupies its line alone.
fn commentIsTrailing(src: []const u8, start: usize) bool {
var k = start;
while (k > 0 and src[k - 1] != '\n') {
const c = src[k - 1];
if (c != ' ' and c != '\t' and c != '\r') return true;
k -= 1;
}
return false;
}
// Whether the source bytes in [from, to) contain no newline (the two offsets sit
// on the same physical line).
fn noNewlineBetween(src: []const u8, from: usize, to: usize) bool {
if (from > to or to > src.len) return false;
var i = from;
while (i < to) : (i += 1) if (src[i] == '\n') return false;
return true;
}
// The lead anchor of a top-level item — a source slice on the first line of its
// rendered form, including any leading doc comment (used to flush the comments
// that come before it).
fn itemLeadAnchor(it: ast.Item) ?[]const u8 {
return switch (it) {
.use_decl => |u| u.module,
.link_decl => |l| l.module,
.const_decl => |c| if (c.doc.len > 0) c.doc[0] else c.name,
.fn_decl => |f| if (f.doc.len > 0) f.doc[0] else f.name,
.comptime_block => |stmts| if (stmts.len > 0) stmtAnchor(stmts[0]) else null,
// The quoted name lexeme is a source slice on the head line.
.test_decl => |t| t.name,
};
}
// The tail anchor of a top-level item — a slice on the declaration's own first
// line (past any doc comment), used to attach a same-line trailing comment.
fn itemTailAnchor(it: ast.Item) ?[]const u8 {
return switch (it) {
.use_decl => |u| u.module,
.link_decl => |l| l.module,
.const_decl => |c| c.name,
.fn_decl => |f| f.name,
.comptime_block => null,
.test_decl => |t| t.name,
};
}
// The lead / tail anchors of a container's associated declaration, as for items.
fn declLeadAnchor(d: ast.ContainerDecl) ?[]const u8 {
return switch (d) {
.method => |m| if (m.doc.len > 0) m.doc[0] else m.name,
.constant => |c| if (c.doc.len > 0) c.doc[0] else c.name,
.use_import => |u| u.module,
};
}
fn declTailAnchor(d: ast.ContainerDecl) ?[]const u8 {
return switch (d) {
.method => |m| m.name,
.constant => |c| c.name,
.use_import => |u| u.module,
};
}
// The boundary anchor of an `else` arm: its first statement's anchor. Null for
// an empty arm (`else {}`), which then simply keeps the enclosing boundary —
// also shielding the eb[0] index from the empty slice.
fn elseAnchor(eb: []ast.Stmt) ?[]const u8 {
return if (eb.len > 0) stmtAnchor(eb[0]) else null;
}
// The boundary anchor of a switch prong: its first pattern. The `else` prong
// has no patterns; its body stands in — for a block body, the first statement
// (mirroring elseAnchor). Null falls back to the whole switch's boundary.
fn prongAnchor(p: ast.SwitchProng) ?[]const u8 {
if (p.patterns.len > 0) return exprAnchor(p.patterns[0].lo);
return switch (p.body) {
.block_expr => |blk| elseAnchor(blk.body),
else => exprAnchor(p.body),
};
}
// A representative source slice on a statement's first physical line, used to
// recover its position for blank-line preservation. Null for the keyword-only
// forms (a bare `break` / `continue`) that store no anchor — they simply take no
// preserved blank.
fn stmtAnchor(s: ast.Stmt) ?[]const u8 {
return switch (s) {
.discard => |x| exprAnchor(x),
.bind => |b| b.name,
// A destructure anchors on its first real name — a `_` skip stores no
// source slice, and the same-line comma rule keeps every name on the
// statement's first line anyway.
.destructure => |d| for (d.names) |maybe| {
if (maybe) |name| break name;
} else null,
.assign => |a| exprAnchor(a.target),
.destructure_assign => |da| exprAnchor(da.targets[0]),
.if_stmt => |iff| exprAnchor(iff.cond),
.while_stmt => |w| exprAnchor(w.cond),
.for_stmt => |fr| if (fr.captures.len > 0) fr.captures[0] else exprAnchor(fr.iter),
.defer_stmt => |inner| stmtAnchor(inner.*),
.errdefer_stmt => |inner| stmtAnchor(inner.*),
// The block forms anchor on their first statement (like a top-level
// comptime block); an empty block has no anchor.
.defer_block, .errdefer_block => |stmts| if (stmts.len > 0) stmtAnchor(stmts[0]) else null,
.expr => |x| exprAnchor(x),
};
}
// The leftmost source slice of an expression (recursing into the head of a
// postfix / binary chain), or null for the forms whose head is a keyword or a
// synthesized node. Used only to locate a statement's first line.
fn exprAnchor(e: ast.Expr) ?[]const u8 {
return switch (e) {
.int, .float, .string, .char, .ident, .value_word, .enum_lit, .error_lit => |s| s,
.multiline_str => |lines| if (lines.len > 0) lines[0] else null,
.member => |m| exprAnchor(m.base.*),
.deref => |d| exprAnchor(d.*),
.optional_unwrap => |u| exprAnchor(u.*),
.call => |c| exprAnchor(c.callee.*),
.index => |ix| exprAnchor(ix.base.*),
.slice => |s| exprAnchor(s.base.*),
.builtin_call => |b| b.name,
.unary => |u| u.op,
.binary => |b| exprAnchor(b.lhs.*),
.group => |g| exprAnchor(g.*),
.if_expr => |iff| exprAnchor(iff.cond.*),
.switch_expr => |sw| exprAnchor(sw.subject.*),
.try_expr => |t| exprAnchor(t.*),
.catch_expr => |c| exprAnchor(c.lhs.*),
.typed_lit => |tl| exprAnchor(tl.type.*),
// A `.{ … }` literal leads with `.{`, which is not a stored slice; use
// its first field's name or value so a `return .{ … }` / `_ = .{ … }`
// statement still has an anchor (without one, a leading comment would be
// pushed past the statement instead of in front of it).
.struct_lit => |fields| if (fields.len == 0)
null
else if (fields[0].name) |n| n else exprAnchor(fields[0].value),
.error_set => |names| if (names.len > 0) names[0] else null,
.ret => |m| if (m) |vals| exprAnchor(vals[0]) else null,
.brk => |b| if (b.value) |v| exprAnchor(v.*) else null,
else => null,
};
}
// --- tests ---------------------------------------------------------------
const testing = std.testing;
fn parseProg(arena: std.mem.Allocator, src: []const u8) parser.Error!ast.Program {
var p = Parser.init(arena, src);
return p.parseProgram();
}
// The line-comment lexemes of `src`, sorted, for a multiset comparison.
fn sortedComments(arena: std.mem.Allocator, src: []const u8) ![]const []const u8 {
var list: std.ArrayList([]const u8) = .empty;
var lx = Lexer.init(src);
while (true) {
const t = lx.next();
if (t.kind == .eof) break;
if (t.kind == .line_comment) try list.append(arena, t.lexeme(src));
}
const slice = try list.toOwnedSlice(arena);
std.mem.sort([]const u8, slice, {}, lessStr);
return slice;
}
fn lessStr(_: void, a: []const u8, b: []const u8) bool {
return std.mem.lessThan(u8, a, b);
}
// The three gates, run on any source (with or without comments): formatting
// never changes the emitted Zig (lower(parse(src)) == lower(parse(fmt(src)))),
// the formatter is idempotent (fmt(fmt(src)) == fmt(src)), and every comment in
// the input appears exactly once in the output (multiset equality).
fn expectStable(src: []const u8) !void {
var a = std.heap.ArenaAllocator.init(testing.allocator);
defer a.deinit();
const arena = a.allocator();
const lowered_src = try lower.emit(arena, try parseProg(arena, src));
const formatted = try format(arena, src);
const lowered_fmt = try lower.emit(arena, try parseProg(arena, formatted));
try testing.expectEqualStrings(lowered_src, lowered_fmt);
const formatted2 = try format(arena, formatted);
try testing.expectEqualStrings(formatted, formatted2);
// comment multiset in == out
const in_comments = try sortedComments(arena, src);
const out_comments = try sortedComments(arena, formatted);
try testing.expectEqual(in_comments.len, out_comments.len);
for (in_comments, out_comments) |ic, oc| try testing.expectEqualStrings(ic, oc);
}
fn expectFormat(src: []const u8, want: []const u8) !void {
var a = std.heap.ArenaAllocator.init(testing.allocator);
defer a.deinit();
try testing.expectEqualStrings(want, try format(a.allocator(), src));
}
test "hello: imports, links, an exported entry, binds and calls" {
// A plain untyped immutable binding renders in the `:=` short-declaration
// canon, whether the source wrote `:=` or `const x =`.
try expectFormat(
\\use flibc
\\
\\link "flibc_start"
\\link "flibc_mem"
\\
\\export fn main(_ usize, _ argv) noreturn {
\\ const msg = "hello from flash\n"
\\ _ = flibc.sys.write_fd(1, msg.ptr, msg.len)
\\ flibc.exit()
\\}
,
\\use flibc
\\
\\link "flibc_start"
\\link "flibc_mem"
\\
\\export fn main(_ usize, _ argv) noreturn {
\\ msg := "hello from flash\n"
\\ _ = flibc.sys.write_fd(1, msg.ptr, msg.len)
\\ flibc.exit()
\\}
\\
);
}
test "types: pointer, slice, sentinel, optional, error-union, fn-type spellings round-trip" {
try expectStable(
\\fn pass(p *u32, q *mut u32, m []u8, w []mut u8, s [*:0]u8) *u32 {
\\ return q
\\}
\\
\\const VTable = struct {
\\ alloc *fn(*mut anyopaque, usize) ?[*]mut u8,
\\ free *fn(*mut anyopaque, []mut u8) void,
\\}
\\
\\fn dup(path cstr) AllocError!i32 {
\\ return error.OutOfMemory
\\}
);
}
test "control flow: if/else-if, while-capture, range-for, switch, defer round-trip" {
try expectStable(
\\fn run(n usize) void {
\\ for i in 0..n {
\\ if i == 0 {
\\ continue
\\ } else if i == 1 {
\\ defer cleanup()
\\ } else {
\\ work(i)
\\ }
\\ }
\\ while it.next() |x| {
\\ _ = x
\\ }
\\ switch tag {
\\ 0 => low(),
\\ 1, 2 => mid(),
\\ else => high(),
\\ }
\\}
);
}
test "containers: struct with fields and a method, enum, tagged union round-trip" {
try expectStable(
\\const Point = struct {
\\ x i32,
\\ y i32 = 0,
\\
\\ fn sum(self Point) i32 {
\\ return self.x + self.y
\\ }
\\}
\\
\\const Color = enum(u8) {
\\ red,
\\ green = 5,
\\ blue,
\\}
\\
\\const Tok = union(enum) {
\\ eof,
\\ int usize,
\\}
);
}
test "enum and union bodies with methods, constants, and imports round-trip" {
try expectStable(
\\const Color = enum(u8) {
\\ red,
\\ green = 5,
\\
\\ use "names" as names
\\
\\ const COUNT usize = 2
\\
\\ /// the canonical default
\\ pub fn default() Color {
\\ return .red
\\ }
\\}
\\
\\const Tok = union(enum) {
\\ eof,
\\ int usize,
\\
\\ fn isEof(self Tok) bool {
\\ return self == .eof
\\ }
\\}
);
}
test "expressions: builtins, logical operators, casts, struct literals round-trip" {
try expectStable(
\\fn f(a bool, b bool) usize {
\\ if a && b || c {
\\ return #intCast(x)
\\ }
\\ p := P{ .x = 1, .y = 2 }
\\ q := .{ 1, 2, 3 }
\\ return value orelse 0
\\}
);
}
test "doc comments are preserved on the declaration they lead" {
try expectStable(
\\/// the maximum
\\/// width
\\pub const MAX = 80
\\
\\/// add two numbers
\\fn add(a i32, b i32) i32 {
\\ return a + b
\\}
);
}
test "blank lines between statements are preserved, collapsed to one" {
try expectFormat(
\\fn f() void {
\\ a()
\\
\\
\\ b()
\\ c()
\\}
,
\\fn f() void {
\\ a()
\\
\\ b()
\\ c()
\\}
\\
);
}
test "value if-expression keeps its parentheses" {
try expectStable(
\\fn pick(c bool) usize {
\\ return if (c) 1 else 2
\\}
);
}
test "the := canon: a plain bind rewrites; typed, var, comptime keep their keyword" {
try expectFormat(
\\fn f() void {
\\ const b = 2
\\ const c i32 = 3
\\ var d = 4
\\ comptime const e = 5
\\}
,
\\fn f() void {
\\ b := 2
\\ const c i32 = 3
\\ var d = 4
\\ comptime const e = 5
\\}
\\
);
}
test "the := canon round-trips and stays stable across binding kinds" {
try expectStable(
\\fn f() void {
\\ a := compute()
\\ const b = other()
\\ const c usize = 3
\\ var d = 4
\\ const g usize align(16) = 6
\\}
);
}
test "a standalone comment leads a statement; a trailing one rides its line" {
try expectFormat(
\\fn f() void {
\\ // compute the sum
\\ s := a + b // the running total
\\ return s
\\}
,
\\fn f() void {
\\ // compute the sum
\\ s := a + b // the running total
\\ return s
\\}
\\
);
}
test "trailing comments ride enum variants" {
try expectFormat(
\\const Kind = enum {
\\ command, // the first token
\\ path, // a later token
\\}
,
\\const Kind = enum {
\\ command, // the first token
\\ path, // a later token
\\}
\\
);
}
test "a file-header block and a doc comment are both preserved" {
try expectFormat(
\\// header line one
\\// header line two
\\
\\/// a doc
\\pub const MAX = 80
,
\\// header line one
\\// header line two
\\
\\/// a doc
\\pub const MAX = 80
\\
);
}
test "a blank line before a top-level comptime block is preserved" {
try expectFormat(
\\fn shim() void {
\\ work()
\\}
\\
\\comptime {
\\ #export(&shim, .{ .name = "_start" })
\\}
,
\\fn shim() void {
\\ work()
\\}
\\
\\comptime {
\\ #export(&shim, .{ .name = "_start" })
\\}
\\
);
}
test "a module-head //! comment leads the file" {
try expectFormat(
\\//! module documentation
\\
\\use flibc
,
\\//! module documentation
\\
\\use flibc
\\
);
}
test "a comment-only file emits its comments" {
try expectFormat(
\\// just a comment
\\// and another
,
\\// just a comment
\\// and another
\\
);
}
test "a block-final comment stays inside the block" {
try expectFormat(
\\fn f() void {
\\ work()
\\ // trailing note inside the block
\\}
,
\\fn f() void {
\\ work()
\\ // trailing note inside the block
\\}
\\
);
}
test "consecutive top-level declarations keep the author's blank-line grouping" {
try expectFormat(
\\pub const A = x.A
\\pub const B = x.B
\\
\\pub const C = x.C
,
\\pub const A = x.A
\\pub const B = x.B
\\
\\pub const C = x.C
\\
);
}
test "a comment leads a return-struct-literal statement, not pushed past it" {
try expectFormat(
\\fn f() T {
\\ // build the result
\\ return .{ .key = .none }
\\}
,
\\fn f() T {
\\ // build the result
\\ return .{ .key = .none }
\\}
\\
);
}
test "a trailing comment on a method's statement stays in that method" {
try expectFormat(
\\const S = struct {
\\ fn a(self S) void {
\\ return
\\ }
\\
\\ fn b(self S) void {
\\ v := f() // a trailing note
\\ }
\\}
,
\\const S = struct {
\\ fn a(self S) void {
\\ return
\\ }
\\
\\ fn b(self S) void {
\\ v := f() // a trailing note
\\ }
\\}
\\
);
}
test "comment-rich source: every comment survives, output is stable" {
try expectStable(
\\// a leading file comment
\\
\\use flibc // the C runtime
\\
\\/// the entry
\\export fn main(_ usize, _ argv) noreturn {
\\ // set up
\\ n := count() // how many
\\ for i in 0..n {
\\ // each iteration
\\ step(i)
\\ }
\\ // tear down
\\ flibc.exit()
\\}
);
}
test "composite-type alias declarations round-trip" {
try expectStable(
\\const F = *fn(u8) u8
\\const O = ?u8
\\const S = []u8
\\const M = *mut fn() void
\\
\\fn take(g Get([]u8)) void {
\\ _ = g
\\}
);
}
test "defer/errdefer block form round-trips, comments riding inside" {
try expectStable(
\\fn run(fd i32) !void {
\\ defer {
\\ // release in reverse order
\\ close(fd)
\\ close(fd + 1)
\\ }
\\ errdefer {
\\ close(0)
\\ }
\\ defer close(fd)
\\ return
\\}
);
}
test "test blocks round-trip, comments riding inside" {
try expectStable(
\\// suite header
\\use std
\\
\\test "first" {
\\ // inside the body
\\ n := 1
\\ _ = n
\\}
\\
\\test "empty" {}
);
}
test "loop else arms and the if else-capture round-trip, comments riding inside" {
try expectStable(
\\fn f(xs []u8, c bool) void {
\\ if next() |v| {
\\ consume(v)
\\ } else |err| {
\\ // the failure arm
\\ log(err)
\\ }
\\ while next() |v| {
\\ consume(v)
\\ } else |err| {
\\ log(err)
\\ }
\\ while c {
\\ // body comment stays in the body
\\ step()
\\ } else {
\\ done()
\\ }
\\ for x in xs {
\\ consume(x)
\\ } else {
\\ done()
\\ }
\\}
);
}
test "inline loops round-trip: inline for across its shapes, inline while unchanged" {
try expectStable(
\\fn f(xs []u8, n usize) void {
\\ inline for x in xs {
\\ consume(x)
\\ }
\\ inline for i in 0..n {
\\ consume(i)
\\ } else {
\\ done()
\\ }
\\ inline for x, i in xs {
\\ // comment rides the unrolled body
\\ consume(i)
\\ }
\\ inline while n > 0 {
\\ step()
\\ }
\\}
);
}
test "an empty else arm round-trips (the elseAnchor guard)" {
// `else {}` has no first statement to anchor the boundary on; the guard
// keeps the enclosing boundary instead of indexing the empty arm.
try expectStable(
\\fn f(xs []u8, c bool) void {
\\ if c {} else {}
\\ while c {} else {}
\\ for x in xs {} else {}
\\}
);
}
test "a block-bodied prong does not adopt a later prong's standalone comment" {
// Without the per-prong boundary narrowing, prong `.a`'s block-close
// flush was bounded by the whole switch and pulled `.b`'s interior
// comment back to the end of `.a`'s body.
try expectStable(
\\fn f(k Kind) void {
\\ switch k {
\\ .a => {
\\ work()
\\ },
\\ .b => {
\\ // interior comment of prong b
\\ more()
\\ },
\\ }
\\}
);
}
test "a block-bodied prong does not adopt a later prong's trailing comments" {
// The adoption was not limited to standalone comments: trailing comments
// on statements inside a later block-bodied prong were pulled back too,
// degraded to standalone lines at the end of the earlier prong's body.
try expectStable(
\\fn f(k Kind) void {
\\ switch k {
\\ .a => {
\\ work()
\\ },
\\ .b => {
\\ if cond {
\\ x() // trailing on x
\\ y() // trailing on y
\\ }
\\ },
\\ }
\\}
);
}
test "a leading comment inside a block-bodied prong stays inside" {
// The inverse drift: a standalone comment as the first line inside a
// prong's block was re-sited out of the prong when an earlier
// block-bodied prong's close flush reached past the prong header.
try expectStable(
\\fn f(k Kind) void {
\\ switch k {
\\ .a => {
\\ work()
\\ },
\\ .b => |w| {
\\ // leading interior comment
\\ if w {
\\ x()
\\ }
\\ },
\\ else => {
\\ // the else prong holds its own comment too
\\ done()
\\ },
\\ }
\\}
);
}
test "tuple types and multi-return round-trip, comments riding inside" {
try expectStable(
\\const Pair = (u8, bool)
\\
\\fn pair() (u8, bool) {
\\ // both spellings hold
\\ return 42, true
\\}
\\
\\fn lit() Pair {
\\ return .{ 7, false }
\\}
\\
\\fn first(t (u8, (u8, bool))) u8 {
\\ return (t[0] + t[1][0]) * 1
\\}
);
}
test "a tuple type's trailing comma drops to the canonical spelling" {
try expectFormat(
\\fn pair() (u8, bool,) {
\\ return 42, true
\\}
,
\\fn pair() (u8, bool) {
\\ return 42, true
\\}
\\
);
}
test "destructures round-trip, comments riding inside" {
try expectStable(
\\fn pair() (u8, bool) {
\\ return 42, true
\\}
\\
\\fn demo() void {
\\ // both skips hold
\\ tok, _ := pair()
\\ _, ok := pair()
\\ var x, y = pair()
\\ x, y = pair() // the assignment list is verbatim
\\ arr[0], y = pair()
\\ _ = tok
\\ _ = ok
\\ _ = x
\\}
);
}
test "the ':=' canon extends to destructures: 'const' rewrites, 'var' keeps its keyword" {
try expectFormat(
\\fn demo() void {
\\ const a, b = pair()
\\ var x, y = pair()
\\ _ = .{ a, b, x, y }
\\}
,
\\fn demo() void {
\\ a, b := pair()
\\ var x, y = pair()
\\ _ = .{ a, b, x, y }
\\}
\\
);
}