Flash 361 lines
// Token taxonomy for Flash. The set covers the whole v1 surface and no more —
// module imports and declarations, the control-flow / error / comptime
// keywords, the operator and compound-assignment families, the four literal
// forms, and the comment and doc-comment trivia. It is the single source of
// truth the lexer and parser fan out from; new syntax adds a variant here first.
use "support" as sup
pub const Kind = enum {
// literals + names
ident,
int,
float, // decimal float literal, e.g. 3.14 or 1.5e-3; value passes through to Zig verbatim
string,
multiline_str, // a `\\…` raw multiline-string line (one per physical line)
char, // 'c'
builtin, // #name(...)
doc_comment, // `///…` doc-comment line (content-bearing; kept and re-emitted)
line_comment, // `//…` line comment in any non-doc shape (`//`, `////`, `//!`); kept for the formatter, filtered out before the parse
// keywords
kw_use,
kw_as,
kw_link,
kw_fn,
kw_export,
kw_extern,
kw_callconv,
kw_align,
kw_linksection,
kw_pub,
kw_inline,
kw_comptime,
kw_const,
kw_var,
kw_orelse,
kw_if,
kw_else,
kw_while,
kw_for,
kw_in,
kw_break,
kw_continue,
kw_return,
kw_try,
kw_catch,
kw_defer,
kw_errdefer,
kw_packed, // the layout modifier on a struct definition (`packed struct`)
kw_struct,
kw_enum,
kw_union,
kw_switch,
kw_asm,
kw_error,
kw_test, // `test "name" { … }` — a top-level test-block declaration
// value keywords — the reserved literal words. They parse only in value
// position (parsePrimary), never as a bindable identifier, so `true`,
// `false`, `null`, `undefined`, and `unreachable` cannot be shadowed by a
// name and lower to the identical Zig keyword.
kw_true,
kw_false,
kw_null,
kw_undefined,
kw_unreachable,
// primitive-type keywords — reserved type-position words. Each names a Zig
// primitive but, being reserved, cannot be shadowed by a binding: `noreturn`
// (the empty return type), `anytype` (an inferred parameter type), and
// `anyopaque` (an incomplete pointee type). All three lower verbatim.
kw_noreturn,
kw_anytype,
kw_anyopaque,
// punctuation
l_paren,
r_paren,
l_brace,
r_brace,
l_bracket,
r_bracket,
comma,
colon,
colon_equal, // :=
dot,
equal,
arrow, // ->
fat_arrow, // => — a switch prong separator
star, // *
underscore, // a lone _
// operators
plus, // +
plus_plus, // ++ — array / slice concatenation
star_star, // ** — array repetition
plus_percent, // +% — wrapping addition
minus, // -
minus_percent, // -% — wrapping subtraction
slash, // /
percent, // %
star_percent, // *% — wrapping multiplication (the base `*` is `star`, in punctuation)
eq_eq, // ==
bang_eq, // !=
lt, // <
lt_eq, // <=
lt_lt, // <<
gt, // >
gt_eq, // >=
gt_gt, // >>
amp, // &
amp_amp, // &&
pipe, // |
pipe_pipe, // ||
caret, // ^
tilde, // ~
bang, // !
question, // ?
dot_dot, // ..
ellipsis3, // ... — an inclusive switch range (lo...hi)
// compound assignment
plus_eq, // +=
minus_eq, // -=
star_eq, // *=
slash_eq, // /=
percent_eq, // %=
amp_eq, // &=
pipe_eq, // |=
caret_eq, // ^=
lt_lt_eq, // <<=
gt_gt_eq, // >>=
plus_percent_eq, // +%= — wrapping add-assign
minus_percent_eq, // -%= — wrapping subtract-assign
star_percent_eq, // *%= — wrapping multiply-assign
// trivia / control
eof,
invalid,
}
pub const Token = struct {
kind Kind,
// Byte range into the source buffer, [start, end). Keeping spans instead
// of copied slices means the lexer allocates nothing and every token can
// point back at the original text for diagnostics.
start u32,
end u32,
line u32,
pub fn lexeme(self Token, src []u8) []u8 {
return src[self.start..self.end]
}
}
// The keyword table, as a flat linear scan. The set is small (43 words) and
// frozen with the v1 grammar, and `eql` rejects on length before it compares
// bytes, so the scan is cheap where it matters — identifiers that are not
// keywords fall through on length alone almost every time. A flat chain is
// also the shape the formatter lays out best; a comptime string map would buy
// nothing at this size.
pub const keywords = struct {
pub fn get(text []u8) ?Kind {
if sup.eql(u8, text, "use") {
return .kw_use
}
if sup.eql(u8, text, "as") {
return .kw_as
}
if sup.eql(u8, text, "link") {
return .kw_link
}
if sup.eql(u8, text, "fn") {
return .kw_fn
}
if sup.eql(u8, text, "export") {
return .kw_export
}
if sup.eql(u8, text, "extern") {
return .kw_extern
}
if sup.eql(u8, text, "callconv") {
return .kw_callconv
}
if sup.eql(u8, text, "align") {
return .kw_align
}
if sup.eql(u8, text, "linksection") {
return .kw_linksection
}
if sup.eql(u8, text, "pub") {
return .kw_pub
}
if sup.eql(u8, text, "inline") {
return .kw_inline
}
if sup.eql(u8, text, "comptime") {
return .kw_comptime
}
if sup.eql(u8, text, "const") {
return .kw_const
}
if sup.eql(u8, text, "var") {
return .kw_var
}
if sup.eql(u8, text, "orelse") {
return .kw_orelse
}
if sup.eql(u8, text, "if") {
return .kw_if
}
if sup.eql(u8, text, "else") {
return .kw_else
}
if sup.eql(u8, text, "while") {
return .kw_while
}
if sup.eql(u8, text, "for") {
return .kw_for
}
if sup.eql(u8, text, "in") {
return .kw_in
}
if sup.eql(u8, text, "break") {
return .kw_break
}
if sup.eql(u8, text, "continue") {
return .kw_continue
}
if sup.eql(u8, text, "return") {
return .kw_return
}
if sup.eql(u8, text, "try") {
return .kw_try
}
if sup.eql(u8, text, "catch") {
return .kw_catch
}
if sup.eql(u8, text, "defer") {
return .kw_defer
}
if sup.eql(u8, text, "errdefer") {
return .kw_errdefer
}
if sup.eql(u8, text, "packed") {
return .kw_packed
}
if sup.eql(u8, text, "struct") {
return .kw_struct
}
if sup.eql(u8, text, "enum") {
return .kw_enum
}
if sup.eql(u8, text, "union") {
return .kw_union
}
if sup.eql(u8, text, "switch") {
return .kw_switch
}
if sup.eql(u8, text, "asm") {
return .kw_asm
}
if sup.eql(u8, text, "error") {
return .kw_error
}
if sup.eql(u8, text, "test") {
return .kw_test
}
if sup.eql(u8, text, "true") {
return .kw_true
}
if sup.eql(u8, text, "false") {
return .kw_false
}
if sup.eql(u8, text, "null") {
return .kw_null
}
if sup.eql(u8, text, "undefined") {
return .kw_undefined
}
if sup.eql(u8, text, "unreachable") {
return .kw_unreachable
}
if sup.eql(u8, text, "noreturn") {
return .kw_noreturn
}
if sup.eql(u8, text, "anytype") {
return .kw_anytype
}
if sup.eql(u8, text, "anyopaque") {
return .kw_anyopaque
}
return null
}
}
test "every keyword maps to its kind" {
try sup.expectEqual(Kind.kw_use, keywords.get("use"))
try sup.expectEqual(Kind.kw_as, keywords.get("as"))
try sup.expectEqual(Kind.kw_link, keywords.get("link"))
try sup.expectEqual(Kind.kw_fn, keywords.get("fn"))
try sup.expectEqual(Kind.kw_export, keywords.get("export"))
try sup.expectEqual(Kind.kw_extern, keywords.get("extern"))
try sup.expectEqual(Kind.kw_callconv, keywords.get("callconv"))
try sup.expectEqual(Kind.kw_align, keywords.get("align"))
try sup.expectEqual(Kind.kw_linksection, keywords.get("linksection"))
try sup.expectEqual(Kind.kw_pub, keywords.get("pub"))
try sup.expectEqual(Kind.kw_inline, keywords.get("inline"))
try sup.expectEqual(Kind.kw_comptime, keywords.get("comptime"))
try sup.expectEqual(Kind.kw_const, keywords.get("const"))
try sup.expectEqual(Kind.kw_var, keywords.get("var"))
try sup.expectEqual(Kind.kw_orelse, keywords.get("orelse"))
try sup.expectEqual(Kind.kw_if, keywords.get("if"))
try sup.expectEqual(Kind.kw_else, keywords.get("else"))
try sup.expectEqual(Kind.kw_while, keywords.get("while"))
try sup.expectEqual(Kind.kw_for, keywords.get("for"))
try sup.expectEqual(Kind.kw_in, keywords.get("in"))
try sup.expectEqual(Kind.kw_break, keywords.get("break"))
try sup.expectEqual(Kind.kw_continue, keywords.get("continue"))
try sup.expectEqual(Kind.kw_return, keywords.get("return"))
try sup.expectEqual(Kind.kw_try, keywords.get("try"))
try sup.expectEqual(Kind.kw_catch, keywords.get("catch"))
try sup.expectEqual(Kind.kw_defer, keywords.get("defer"))
try sup.expectEqual(Kind.kw_errdefer, keywords.get("errdefer"))
try sup.expectEqual(Kind.kw_packed, keywords.get("packed"))
try sup.expectEqual(Kind.kw_struct, keywords.get("struct"))
try sup.expectEqual(Kind.kw_enum, keywords.get("enum"))
try sup.expectEqual(Kind.kw_union, keywords.get("union"))
try sup.expectEqual(Kind.kw_switch, keywords.get("switch"))
try sup.expectEqual(Kind.kw_asm, keywords.get("asm"))
try sup.expectEqual(Kind.kw_error, keywords.get("error"))
try sup.expectEqual(Kind.kw_test, keywords.get("test"))
try sup.expectEqual(Kind.kw_true, keywords.get("true"))
try sup.expectEqual(Kind.kw_false, keywords.get("false"))
try sup.expectEqual(Kind.kw_null, keywords.get("null"))
try sup.expectEqual(Kind.kw_undefined, keywords.get("undefined"))
try sup.expectEqual(Kind.kw_unreachable, keywords.get("unreachable"))
try sup.expectEqual(Kind.kw_noreturn, keywords.get("noreturn"))
try sup.expectEqual(Kind.kw_anytype, keywords.get("anytype"))
try sup.expectEqual(Kind.kw_anyopaque, keywords.get("anyopaque"))
}
test "non-keywords miss the table" {
try sup.expectEqual(null, keywords.get("flash"))
try sup.expectEqual(null, keywords.get("us"))
try sup.expectEqual(null, keywords.get("usee"))
try sup.expectEqual(null, keywords.get("Use"))
try sup.expectEqual(null, keywords.get("kw_use"))
try sup.expectEqual(null, keywords.get(""))
}
test "lexeme slices the token's span out of the source" {
const src []u8 = "const x = 1"
const t Token = .{ .kind = .ident, .start = 6, .end = 7, .line = 1 }
try sup.expectEqualStrings("x", t.lexeme(src))
}
test "lexeme spans the whole buffer at the extremes" {
const src []u8 = "use"
const t Token = .{ .kind = .kw_use, .start = 0, .end = 3, .line = 1 }
try sup.expectEqualStrings("use", t.lexeme(src))
}