ajhahn.de
← Flash
Flash 361 lines
// Token taxonomy for Flash. The set covers the whole v1 surface and no more —
// module imports and declarations, the control-flow / error / comptime
// keywords, the operator and compound-assignment families, the four literal
// forms, and the comment and doc-comment trivia. It is the single source of
// truth the lexer and parser fan out from; new syntax adds a variant here first.

use "support" as sup

pub const Kind = enum {
    // literals + names
    ident,
    int,
    float, // decimal float literal, e.g. 3.14 or 1.5e-3; value passes through to Zig verbatim
    string,
    multiline_str, // a `\\…` raw multiline-string line (one per physical line)
    char, // 'c'
    builtin, // #name(...)
    doc_comment, // `///…` doc-comment line (content-bearing; kept and re-emitted)
    line_comment, // `//…` line comment in any non-doc shape (`//`, `////`, `//!`); kept for the formatter, filtered out before the parse

    // keywords
    kw_use,
    kw_as,
    kw_link,
    kw_fn,
    kw_export,
    kw_extern,
    kw_callconv,
    kw_align,
    kw_linksection,
    kw_pub,
    kw_inline,
    kw_comptime,
    kw_const,
    kw_var,
    kw_orelse,
    kw_if,
    kw_else,
    kw_while,
    kw_for,
    kw_in,
    kw_break,
    kw_continue,
    kw_return,
    kw_try,
    kw_catch,
    kw_defer,
    kw_errdefer,
    kw_packed, // the layout modifier on a struct definition (`packed struct`)
    kw_struct,
    kw_enum,
    kw_union,
    kw_switch,
    kw_asm,
    kw_error,
    kw_test, // `test "name" { … }` — a top-level test-block declaration
    // value keywords — the reserved literal words. They parse only in value
    // position (parsePrimary), never as a bindable identifier, so `true`,
    // `false`, `null`, `undefined`, and `unreachable` cannot be shadowed by a
    // name and lower to the identical Zig keyword.
    kw_true,
    kw_false,
    kw_null,
    kw_undefined,
    kw_unreachable,
    // primitive-type keywords — reserved type-position words. Each names a Zig
    // primitive but, being reserved, cannot be shadowed by a binding: `noreturn`
    // (the empty return type), `anytype` (an inferred parameter type), and
    // `anyopaque` (an incomplete pointee type). All three lower verbatim.
    kw_noreturn,
    kw_anytype,
    kw_anyopaque,

    // punctuation
    l_paren,
    r_paren,
    l_brace,
    r_brace,
    l_bracket,
    r_bracket,
    comma,
    colon,
    colon_equal, // :=
    dot,
    equal,
    arrow, // ->
    fat_arrow, // => — a switch prong separator
    star, // *
    underscore, // a lone _

    // operators
    plus, // +
    plus_plus, // ++ — array / slice concatenation
    star_star, // ** — array repetition
    plus_percent, // +% — wrapping addition
    minus, // -
    minus_percent, // -% — wrapping subtraction
    slash, // /
    percent, // %
    star_percent, // *% — wrapping multiplication (the base `*` is `star`, in punctuation)
    eq_eq, // ==
    bang_eq, // !=
    lt, // <
    lt_eq, // <=
    lt_lt, // <<
    gt, // >
    gt_eq, // >=
    gt_gt, // >>
    amp, // &
    amp_amp, // &&
    pipe, // |
    pipe_pipe, // ||
    caret, // ^
    tilde, // ~
    bang, // !
    question, // ?
    dot_dot, // ..
    ellipsis3, // ... — an inclusive switch range (lo...hi)

    // compound assignment
    plus_eq, // +=
    minus_eq, // -=
    star_eq, // *=
    slash_eq, // /=
    percent_eq, // %=
    amp_eq, // &=
    pipe_eq, // |=
    caret_eq, // ^=
    lt_lt_eq, // <<=
    gt_gt_eq, // >>=
    plus_percent_eq, // +%= — wrapping add-assign
    minus_percent_eq, // -%= — wrapping subtract-assign
    star_percent_eq, // *%= — wrapping multiply-assign

    // trivia / control
    eof,
    invalid,
}

pub const Token = struct {
    kind Kind,
    // Byte range into the source buffer, [start, end). Keeping spans instead
    // of copied slices means the lexer allocates nothing and every token can
    // point back at the original text for diagnostics.
    start u32,
    end u32,
    line u32,

    pub fn lexeme(self Token, src []u8) []u8 {
        return src[self.start..self.end]
    }
}

// The keyword table, as a flat linear scan. The set is small (43 words) and
// frozen with the v1 grammar, and `eql` rejects on length before it compares
// bytes, so the scan is cheap where it matters — identifiers that are not
// keywords fall through on length alone almost every time. A flat chain is
// also the shape the formatter lays out best; a comptime string map would buy
// nothing at this size.
pub const keywords = struct {
    pub fn get(text []u8) ?Kind {
        if sup.eql(u8, text, "use") {
            return .kw_use
        }
        if sup.eql(u8, text, "as") {
            return .kw_as
        }
        if sup.eql(u8, text, "link") {
            return .kw_link
        }
        if sup.eql(u8, text, "fn") {
            return .kw_fn
        }
        if sup.eql(u8, text, "export") {
            return .kw_export
        }
        if sup.eql(u8, text, "extern") {
            return .kw_extern
        }
        if sup.eql(u8, text, "callconv") {
            return .kw_callconv
        }
        if sup.eql(u8, text, "align") {
            return .kw_align
        }
        if sup.eql(u8, text, "linksection") {
            return .kw_linksection
        }
        if sup.eql(u8, text, "pub") {
            return .kw_pub
        }
        if sup.eql(u8, text, "inline") {
            return .kw_inline
        }
        if sup.eql(u8, text, "comptime") {
            return .kw_comptime
        }
        if sup.eql(u8, text, "const") {
            return .kw_const
        }
        if sup.eql(u8, text, "var") {
            return .kw_var
        }
        if sup.eql(u8, text, "orelse") {
            return .kw_orelse
        }
        if sup.eql(u8, text, "if") {
            return .kw_if
        }
        if sup.eql(u8, text, "else") {
            return .kw_else
        }
        if sup.eql(u8, text, "while") {
            return .kw_while
        }
        if sup.eql(u8, text, "for") {
            return .kw_for
        }
        if sup.eql(u8, text, "in") {
            return .kw_in
        }
        if sup.eql(u8, text, "break") {
            return .kw_break
        }
        if sup.eql(u8, text, "continue") {
            return .kw_continue
        }
        if sup.eql(u8, text, "return") {
            return .kw_return
        }
        if sup.eql(u8, text, "try") {
            return .kw_try
        }
        if sup.eql(u8, text, "catch") {
            return .kw_catch
        }
        if sup.eql(u8, text, "defer") {
            return .kw_defer
        }
        if sup.eql(u8, text, "errdefer") {
            return .kw_errdefer
        }
        if sup.eql(u8, text, "packed") {
            return .kw_packed
        }
        if sup.eql(u8, text, "struct") {
            return .kw_struct
        }
        if sup.eql(u8, text, "enum") {
            return .kw_enum
        }
        if sup.eql(u8, text, "union") {
            return .kw_union
        }
        if sup.eql(u8, text, "switch") {
            return .kw_switch
        }
        if sup.eql(u8, text, "asm") {
            return .kw_asm
        }
        if sup.eql(u8, text, "error") {
            return .kw_error
        }
        if sup.eql(u8, text, "test") {
            return .kw_test
        }
        if sup.eql(u8, text, "true") {
            return .kw_true
        }
        if sup.eql(u8, text, "false") {
            return .kw_false
        }
        if sup.eql(u8, text, "null") {
            return .kw_null
        }
        if sup.eql(u8, text, "undefined") {
            return .kw_undefined
        }
        if sup.eql(u8, text, "unreachable") {
            return .kw_unreachable
        }
        if sup.eql(u8, text, "noreturn") {
            return .kw_noreturn
        }
        if sup.eql(u8, text, "anytype") {
            return .kw_anytype
        }
        if sup.eql(u8, text, "anyopaque") {
            return .kw_anyopaque
        }
        return null
    }
}

test "every keyword maps to its kind" {
    try sup.expectEqual(Kind.kw_use, keywords.get("use"))
    try sup.expectEqual(Kind.kw_as, keywords.get("as"))
    try sup.expectEqual(Kind.kw_link, keywords.get("link"))
    try sup.expectEqual(Kind.kw_fn, keywords.get("fn"))
    try sup.expectEqual(Kind.kw_export, keywords.get("export"))
    try sup.expectEqual(Kind.kw_extern, keywords.get("extern"))
    try sup.expectEqual(Kind.kw_callconv, keywords.get("callconv"))
    try sup.expectEqual(Kind.kw_align, keywords.get("align"))
    try sup.expectEqual(Kind.kw_linksection, keywords.get("linksection"))
    try sup.expectEqual(Kind.kw_pub, keywords.get("pub"))
    try sup.expectEqual(Kind.kw_inline, keywords.get("inline"))
    try sup.expectEqual(Kind.kw_comptime, keywords.get("comptime"))
    try sup.expectEqual(Kind.kw_const, keywords.get("const"))
    try sup.expectEqual(Kind.kw_var, keywords.get("var"))
    try sup.expectEqual(Kind.kw_orelse, keywords.get("orelse"))
    try sup.expectEqual(Kind.kw_if, keywords.get("if"))
    try sup.expectEqual(Kind.kw_else, keywords.get("else"))
    try sup.expectEqual(Kind.kw_while, keywords.get("while"))
    try sup.expectEqual(Kind.kw_for, keywords.get("for"))
    try sup.expectEqual(Kind.kw_in, keywords.get("in"))
    try sup.expectEqual(Kind.kw_break, keywords.get("break"))
    try sup.expectEqual(Kind.kw_continue, keywords.get("continue"))
    try sup.expectEqual(Kind.kw_return, keywords.get("return"))
    try sup.expectEqual(Kind.kw_try, keywords.get("try"))
    try sup.expectEqual(Kind.kw_catch, keywords.get("catch"))
    try sup.expectEqual(Kind.kw_defer, keywords.get("defer"))
    try sup.expectEqual(Kind.kw_errdefer, keywords.get("errdefer"))
    try sup.expectEqual(Kind.kw_packed, keywords.get("packed"))
    try sup.expectEqual(Kind.kw_struct, keywords.get("struct"))
    try sup.expectEqual(Kind.kw_enum, keywords.get("enum"))
    try sup.expectEqual(Kind.kw_union, keywords.get("union"))
    try sup.expectEqual(Kind.kw_switch, keywords.get("switch"))
    try sup.expectEqual(Kind.kw_asm, keywords.get("asm"))
    try sup.expectEqual(Kind.kw_error, keywords.get("error"))
    try sup.expectEqual(Kind.kw_test, keywords.get("test"))
    try sup.expectEqual(Kind.kw_true, keywords.get("true"))
    try sup.expectEqual(Kind.kw_false, keywords.get("false"))
    try sup.expectEqual(Kind.kw_null, keywords.get("null"))
    try sup.expectEqual(Kind.kw_undefined, keywords.get("undefined"))
    try sup.expectEqual(Kind.kw_unreachable, keywords.get("unreachable"))
    try sup.expectEqual(Kind.kw_noreturn, keywords.get("noreturn"))
    try sup.expectEqual(Kind.kw_anytype, keywords.get("anytype"))
    try sup.expectEqual(Kind.kw_anyopaque, keywords.get("anyopaque"))
}

test "non-keywords miss the table" {
    try sup.expectEqual(null, keywords.get("flash"))
    try sup.expectEqual(null, keywords.get("us"))
    try sup.expectEqual(null, keywords.get("usee"))
    try sup.expectEqual(null, keywords.get("Use"))
    try sup.expectEqual(null, keywords.get("kw_use"))
    try sup.expectEqual(null, keywords.get(""))
}

test "lexeme slices the token's span out of the source" {
    const src []u8 = "const x = 1"
    const t Token = .{ .kind = .ident, .start = 6, .end = 7, .line = 1 }
    try sup.expectEqualStrings("x", t.lexeme(src))
}

test "lexeme spans the whole buffer at the extremes" {
    const src []u8 = "use"
    const t Token = .{ .kind = .kw_use, .start = 0, .end = 3, .line = 1 }
    try sup.expectEqualStrings("use", t.lexeme(src))
}