Flash 128 lines
// tokenize — the fsh command tokenizer, the first FlashOS module ported to
// Flash from its hand-written Zig.
//
// A whitespace splitter with an optional single `|` split. Pure: no syscalls,
// no allocator. A caller-owned argv array and scratch buffer are filled in
// place (fixed-size, no realloc); the tokenizer reports how the line
// decomposes. Tokens are maximal runs of non-whitespace, non-`|` bytes; the
// first `|` splits the line into a left and a right command. Each token is
// copied NUL-terminated into `buf` and its argv slot points there, with a
// `null` slot marking the pipe boundary and the line end so each side is an
// execve-ready NULL-terminated vector. Overflow truncates; a second `|`, or a
// `|` with an empty side, is a hard error.
//
// First port to exercise tagged-union result types end to end: `union(enum)`
// with mixed void and payload variants, union literals including a nested
// `.{ .piped = .{ … } }`, and bare enum-literal returns (`return .empty`). It
// also drives the composite signature `argv *mut [MAX_ARGS]?[*:0]mut u8`, the
// sentinel slice `buf[buf_pos .. buf_pos + tok.len :0].ptr`, an open-ended
// chained slice `buf[buf_pos..][0..tok.len]`, and compound-condition `while`
// scans. Doc comments are carried through verbatim. The host tests
// that accompany the reference are not part of this core port.
/// argv capacity, including the interleaved `null` separators (the pipe
/// boundary and the trailing terminator). 16 covers a command plus a
/// generous argument list for demoware; longer lines truncate.
pub const MAX_ARGS usize = 16
/// Why the two sides of a `|` cannot both be commands, or why a second
/// `|` appeared.
pub const Err = enum {
too_many_pipes,
empty_side,
}
/// A single-pipe decomposition. The right command's argv begins at
/// `argv[left_argc + 1]` (the `+ 1` skips the `null` the tokenizer wrote
/// at the pipe boundary); both vectors are NULL-terminated in place.
pub const Piped = struct {
left_argc usize,
right_argc usize,
}
/// How a line decomposed.
pub const Result = union(enum) {
/// Blank or whitespace-only line — fsh redraws the prompt.
empty,
/// One command; `argv[0..argc]` valid, `argv[argc] == null`.
single usize,
/// One pipe stage; see `Piped`.
piped Piped,
/// Malformed pipe usage.
err Err,
}
inline fn is_space(c u8) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
/// Split `line` into `argv` (pointers into `buf`). See the module header
/// for the decomposition rules. `argv` and `buf` are caller-owned and
/// reused per line; the returned pointers are valid until the next call
/// that reuses them.
pub fn tokenize(line []u8, argv *mut [MAX_ARGS]?[*:0]mut u8, buf []mut u8) Result {
var argc usize = 0
var buf_pos usize = 0
var pipe_at ?usize = null
var pipes usize = 0
var i usize = 0
while i < line.len {
while i < line.len && is_space(line[i]) {
i += 1
}
if i >= line.len {
break
}
if argc >= MAX_ARGS - 1 {
break
}
if line[i] == '|' {
pipes += 1
if pipes > 1 {
return .{ .err = .too_many_pipes }
}
pipe_at = argc
argv[argc] = null
argc += 1
i += 1
continue
}
start := i
while i < line.len && !is_space(line[i]) && line[i] != '|' {
i += 1
}
tok := line[start..i]
if buf_pos + tok.len + 1 > buf.len {
break
}
#memcpy(buf[buf_pos..][0..tok.len], tok)
buf[buf_pos + tok.len] = 0
argv[argc] = buf[buf_pos .. buf_pos + tok.len :0].ptr
argc += 1
buf_pos += tok.len + 1
}
if argc < MAX_ARGS {
argv[argc] = null
}
if pipe_at |p| {
left_argc := p
right_argc := argc - p - 1
if left_argc == 0 || right_argc == 0 {
return .{ .err = .empty_side }
}
return .{ .piped = .{ .left_argc = left_argc, .right_argc = right_argc } }
}
if argc == 0 {
return .empty
}
return .{ .single = argc }
}