ajhahn.de
← Flash
Zig 191 lines
// Token taxonomy for Flash. The set covers the whole v1 surface and no more —
// module imports and declarations, the control-flow / error / comptime
// keywords, the operator and compound-assignment families, the four literal
// forms, and the comment and doc-comment trivia. It is the single source of
// truth the lexer and parser fan out from; new syntax adds a variant here first.

const std = @import("std");

pub const Kind = enum {
    // literals + names
    ident,
    int,
    float, // decimal float literal, e.g. 3.14 or 1.5e-3; value passes through to Zig verbatim
    string,
    multiline_str, // a `\\…` raw multiline-string line (one per physical line)
    char, // 'c'
    builtin, // #name(...)
    doc_comment, // `///…` doc-comment line (content-bearing; kept and re-emitted)
    line_comment, // `//…` line comment in any non-doc shape (`//`, `////`, `//!`); kept for the formatter, filtered out before the parse

    // keywords
    kw_use,
    kw_as,
    kw_link,
    kw_fn,
    kw_export,
    kw_extern,
    kw_callconv,
    kw_align,
    kw_pub,
    kw_inline,
    kw_comptime,
    kw_const,
    kw_var,
    kw_orelse,
    kw_if,
    kw_else,
    kw_while,
    kw_for,
    kw_in,
    kw_break,
    kw_continue,
    kw_return,
    kw_try,
    kw_catch,
    kw_defer,
    kw_errdefer,
    kw_struct,
    kw_enum,
    kw_union,
    kw_switch,
    kw_asm,
    kw_error,
    kw_test, // `test "name" { … }` — a top-level test-block declaration
    // value keywords — the reserved literal words. They parse only in value
    // position (parsePrimary), never as a bindable identifier, so `true`,
    // `false`, `null`, `undefined`, and `unreachable` cannot be shadowed by a
    // name and lower to the identical Zig keyword.
    kw_true,
    kw_false,
    kw_null,
    kw_undefined,
    kw_unreachable,
    // primitive-type keywords — reserved type-position words. Each names a Zig
    // primitive but, being reserved, cannot be shadowed by a binding: `noreturn`
    // (the empty return type), `anytype` (an inferred parameter type), and
    // `anyopaque` (an incomplete pointee type). All three lower verbatim.
    kw_noreturn,
    kw_anytype,
    kw_anyopaque,

    // punctuation
    l_paren,
    r_paren,
    l_brace,
    r_brace,
    l_bracket,
    r_bracket,
    comma,
    colon,
    colon_equal, // :=
    dot,
    equal,
    arrow, // ->
    fat_arrow, // => — a switch prong separator
    star, // *
    underscore, // a lone _

    // operators
    plus, // +
    plus_plus, // ++ — array / slice concatenation
    plus_percent, // +% — wrapping addition
    minus, // -
    minus_percent, // -% — wrapping subtraction
    slash, // /
    percent, // %
    star_percent, // *% — wrapping multiplication (the base `*` is `star`, in punctuation)
    eq_eq, // ==
    bang_eq, // !=
    lt, // <
    lt_eq, // <=
    lt_lt, // <<
    gt, // >
    gt_eq, // >=
    gt_gt, // >>
    amp, // &
    amp_amp, // &&
    pipe, // |
    pipe_pipe, // ||
    caret, // ^
    tilde, // ~
    bang, // !
    question, // ?
    dot_dot, // ..
    ellipsis3, // ... — an inclusive switch range (lo...hi)

    // compound assignment
    plus_eq, // +=
    minus_eq, // -=
    star_eq, // *=
    slash_eq, // /=
    percent_eq, // %=
    amp_eq, // &=
    pipe_eq, // |=
    caret_eq, // ^=
    lt_lt_eq, // <<=
    gt_gt_eq, // >>=

    // trivia / control
    eof,
    invalid,
};

pub const Token = struct {
    kind: Kind,
    // Byte range into the source buffer, [start, end). Keeping spans instead
    // of copied slices means the lexer allocates nothing and every token can
    // point back at the original text for diagnostics.
    start: u32,
    end: u32,
    line: u32,

    pub fn lexeme(self: Token, src: []const u8) []const u8 {
        return src[self.start..self.end];
    }
};

pub const keywords = std.StaticStringMap(Kind).initComptime(.{
    .{ "use", .kw_use },
    .{ "as", .kw_as },
    .{ "link", .kw_link },
    .{ "fn", .kw_fn },
    .{ "export", .kw_export },
    .{ "extern", .kw_extern },
    .{ "callconv", .kw_callconv },
    .{ "align", .kw_align },
    .{ "pub", .kw_pub },
    .{ "inline", .kw_inline },
    .{ "comptime", .kw_comptime },
    .{ "const", .kw_const },
    .{ "var", .kw_var },
    .{ "orelse", .kw_orelse },
    .{ "if", .kw_if },
    .{ "else", .kw_else },
    .{ "while", .kw_while },
    .{ "for", .kw_for },
    .{ "in", .kw_in },
    .{ "break", .kw_break },
    .{ "continue", .kw_continue },
    .{ "return", .kw_return },
    .{ "try", .kw_try },
    .{ "catch", .kw_catch },
    .{ "defer", .kw_defer },
    .{ "errdefer", .kw_errdefer },
    .{ "struct", .kw_struct },
    .{ "enum", .kw_enum },
    .{ "union", .kw_union },
    .{ "switch", .kw_switch },
    .{ "asm", .kw_asm },
    .{ "error", .kw_error },
    .{ "test", .kw_test },
    .{ "true", .kw_true },
    .{ "false", .kw_false },
    .{ "null", .kw_null },
    .{ "undefined", .kw_undefined },
    .{ "unreachable", .kw_unreachable },
    .{ "noreturn", .kw_noreturn },
    .{ "anytype", .kw_anytype },
    .{ "anyopaque", .kw_anyopaque },
});