ajhahn.de
← Flash
Flash 128 lines
// tokenize — the fsh command tokenizer, the first FlashOS module ported to
// Flash from its hand-written Zig.
//
// A whitespace splitter with an optional single `|` split. Pure: no syscalls,
// no allocator. A caller-owned argv array and scratch buffer are filled in
// place (fixed-size, no realloc); the tokenizer reports how the line
// decomposes. Tokens are maximal runs of non-whitespace, non-`|` bytes; the
// first `|` splits the line into a left and a right command. Each token is
// copied NUL-terminated into `buf` and its argv slot points there, with a
// `null` slot marking the pipe boundary and the line end so each side is an
// execve-ready NULL-terminated vector. Overflow truncates; a second `|`, or a
// `|` with an empty side, is a hard error.
//
// First port to exercise tagged-union result types end to end: `union(enum)`
// with mixed void and payload variants, union literals including a nested
// `.{ .piped = .{ … } }`, and bare enum-literal returns (`return .empty`). It
// also drives the composite signature `argv *mut [MAX_ARGS]?[*:0]mut u8`, the
// sentinel slice `buf[buf_pos .. buf_pos + tok.len :0].ptr`, an open-ended
// chained slice `buf[buf_pos..][0..tok.len]`, and compound-condition `while`
// scans. Doc comments are carried through verbatim. The host tests
// that accompany the reference are not part of this core port.

/// argv capacity, including the interleaved `null` separators (the pipe
/// boundary and the trailing terminator). 16 covers a command plus a
/// generous argument list for demoware; longer lines truncate.
pub const MAX_ARGS usize = 16

/// Why the two sides of a `|` cannot both be commands, or why a second
/// `|` appeared.
pub const Err = enum {
    too_many_pipes,
    empty_side,
}

/// A single-pipe decomposition. The right command's argv begins at
/// `argv[left_argc + 1]` (the `+ 1` skips the `null` the tokenizer wrote
/// at the pipe boundary); both vectors are NULL-terminated in place.
pub const Piped = struct {
    left_argc usize,
    right_argc usize,
}

/// How a line decomposed.
pub const Result = union(enum) {
    /// Blank or whitespace-only line — fsh redraws the prompt.
    empty,
    /// One command; `argv[0..argc]` valid, `argv[argc] == null`.
    single usize,
    /// One pipe stage; see `Piped`.
    piped Piped,
    /// Malformed pipe usage.
    err Err,
}

inline fn is_space(c u8) bool {
    return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}

/// Split `line` into `argv` (pointers into `buf`). See the module header
/// for the decomposition rules. `argv` and `buf` are caller-owned and
/// reused per line; the returned pointers are valid until the next call
/// that reuses them.
pub fn tokenize(line []u8, argv *mut [MAX_ARGS]?[*:0]mut u8, buf []mut u8) Result {
    var argc usize = 0
    var buf_pos usize = 0
    var pipe_at ?usize = null
    var pipes usize = 0

    var i usize = 0
    while i < line.len {
        while i < line.len && is_space(line[i]) {
            i += 1
        }
        if i >= line.len {
            break
        }

        if argc >= MAX_ARGS - 1 {
            break
        }

        if line[i] == '|' {
            pipes += 1
            if pipes > 1 {
                return .{ .err = .too_many_pipes }
            }
            pipe_at = argc
            argv[argc] = null
            argc += 1
            i += 1
            continue
        }

        start := i
        while i < line.len && !is_space(line[i]) && line[i] != '|' {
            i += 1
        }
        tok := line[start..i]

        if buf_pos + tok.len + 1 > buf.len {
            break
        }
        #memcpy(buf[buf_pos..][0..tok.len], tok)
        buf[buf_pos + tok.len] = 0
        argv[argc] = buf[buf_pos .. buf_pos + tok.len :0].ptr
        argc += 1
        buf_pos += tok.len + 1
    }

    if argc < MAX_ARGS {
        argv[argc] = null
    }

    if pipe_at |p| {
        left_argc := p
        right_argc := argc - p - 1
        if left_argc == 0 || right_argc == 0 {
            return .{ .err = .empty_side }
        }
        return .{ .piped = .{ .left_argc = left_argc, .right_argc = right_argc } }
    }

    if argc == 0 {
        return .empty
    }
    return .{ .single = argc }
}