Zig 191 lines
// Token taxonomy for Flash. The set covers the whole v1 surface and no more —
// module imports and declarations, the control-flow / error / comptime
// keywords, the operator and compound-assignment families, the four literal
// forms, and the comment and doc-comment trivia. It is the single source of
// truth the lexer and parser fan out from; new syntax adds a variant here first.
const std = @import("std");
pub const Kind = enum {
// literals + names
ident,
int,
float, // decimal float literal, e.g. 3.14 or 1.5e-3; value passes through to Zig verbatim
string,
multiline_str, // a `\\…` raw multiline-string line (one per physical line)
char, // 'c'
builtin, // #name(...)
doc_comment, // `///…` doc-comment line (content-bearing; kept and re-emitted)
line_comment, // `//…` line comment in any non-doc shape (`//`, `////`, `//!`); kept for the formatter, filtered out before the parse
// keywords
kw_use,
kw_as,
kw_link,
kw_fn,
kw_export,
kw_extern,
kw_callconv,
kw_align,
kw_pub,
kw_inline,
kw_comptime,
kw_const,
kw_var,
kw_orelse,
kw_if,
kw_else,
kw_while,
kw_for,
kw_in,
kw_break,
kw_continue,
kw_return,
kw_try,
kw_catch,
kw_defer,
kw_errdefer,
kw_struct,
kw_enum,
kw_union,
kw_switch,
kw_asm,
kw_error,
kw_test, // `test "name" { … }` — a top-level test-block declaration
// value keywords — the reserved literal words. They parse only in value
// position (parsePrimary), never as a bindable identifier, so `true`,
// `false`, `null`, `undefined`, and `unreachable` cannot be shadowed by a
// name and lower to the identical Zig keyword.
kw_true,
kw_false,
kw_null,
kw_undefined,
kw_unreachable,
// primitive-type keywords — reserved type-position words. Each names a Zig
// primitive but, being reserved, cannot be shadowed by a binding: `noreturn`
// (the empty return type), `anytype` (an inferred parameter type), and
// `anyopaque` (an incomplete pointee type). All three lower verbatim.
kw_noreturn,
kw_anytype,
kw_anyopaque,
// punctuation
l_paren,
r_paren,
l_brace,
r_brace,
l_bracket,
r_bracket,
comma,
colon,
colon_equal, // :=
dot,
equal,
arrow, // ->
fat_arrow, // => — a switch prong separator
star, // *
underscore, // a lone _
// operators
plus, // +
plus_plus, // ++ — array / slice concatenation
plus_percent, // +% — wrapping addition
minus, // -
minus_percent, // -% — wrapping subtraction
slash, // /
percent, // %
star_percent, // *% — wrapping multiplication (the base `*` is `star`, in punctuation)
eq_eq, // ==
bang_eq, // !=
lt, // <
lt_eq, // <=
lt_lt, // <<
gt, // >
gt_eq, // >=
gt_gt, // >>
amp, // &
amp_amp, // &&
pipe, // |
pipe_pipe, // ||
caret, // ^
tilde, // ~
bang, // !
question, // ?
dot_dot, // ..
ellipsis3, // ... — an inclusive switch range (lo...hi)
// compound assignment
plus_eq, // +=
minus_eq, // -=
star_eq, // *=
slash_eq, // /=
percent_eq, // %=
amp_eq, // &=
pipe_eq, // |=
caret_eq, // ^=
lt_lt_eq, // <<=
gt_gt_eq, // >>=
// trivia / control
eof,
invalid,
};
pub const Token = struct {
kind: Kind,
// Byte range into the source buffer, [start, end). Keeping spans instead
// of copied slices means the lexer allocates nothing and every token can
// point back at the original text for diagnostics.
start: u32,
end: u32,
line: u32,
pub fn lexeme(self: Token, src: []const u8) []const u8 {
return src[self.start..self.end];
}
};
pub const keywords = std.StaticStringMap(Kind).initComptime(.{
.{ "use", .kw_use },
.{ "as", .kw_as },
.{ "link", .kw_link },
.{ "fn", .kw_fn },
.{ "export", .kw_export },
.{ "extern", .kw_extern },
.{ "callconv", .kw_callconv },
.{ "align", .kw_align },
.{ "pub", .kw_pub },
.{ "inline", .kw_inline },
.{ "comptime", .kw_comptime },
.{ "const", .kw_const },
.{ "var", .kw_var },
.{ "orelse", .kw_orelse },
.{ "if", .kw_if },
.{ "else", .kw_else },
.{ "while", .kw_while },
.{ "for", .kw_for },
.{ "in", .kw_in },
.{ "break", .kw_break },
.{ "continue", .kw_continue },
.{ "return", .kw_return },
.{ "try", .kw_try },
.{ "catch", .kw_catch },
.{ "defer", .kw_defer },
.{ "errdefer", .kw_errdefer },
.{ "struct", .kw_struct },
.{ "enum", .kw_enum },
.{ "union", .kw_union },
.{ "switch", .kw_switch },
.{ "asm", .kw_asm },
.{ "error", .kw_error },
.{ "test", .kw_test },
.{ "true", .kw_true },
.{ "false", .kw_false },
.{ "null", .kw_null },
.{ "undefined", .kw_undefined },
.{ "unreachable", .kw_unreachable },
.{ "noreturn", .kw_noreturn },
.{ "anytype", .kw_anytype },
.{ "anyopaque", .kw_anyopaque },
});