Flash 150 lines
// grep match core — the pure substring matcher /bin/grep drives.
//
// A tool-local seam in the spirit of fsh's tokenize.flash and flibc's pager:
// the one piece of grep worth host-testing in isolation, kept apart from the
// driver half (argv parsing, open/read, line assembly) that lives in
// tools/grep.flash. Pure by construction — no allocator, no module state, no
// SVC, no flibc dependency — so it compiles and runs on the host under
// `zig build test` exactly as it does on the device.
//
// The match is a plain windowed substring scan, optionally case-folded over
// ASCII: grep needs no regex for its first cut (the product vision asks only
// for literal-pattern line search). An empty pattern matches every line, the
// GNU grep convention a `grep '' FILE` relies on.
/// True when `line` contains `pat` as a contiguous substring. With
/// `ignore_case`, A–Z fold to a–z on both sides before comparing (ASCII only;
/// bytes >= 0x80 are matched verbatim). An empty `pat` matches every line; a
/// `pat` longer than `line` never matches. O(line.len * pat.len) — fine for the
/// short lines a serial-console grep sees.
pub fn lineContains(line []u8, pat []u8, ignore_case bool) bool {
if pat.len == 0 {
return true
}
if pat.len > line.len {
return false
}
var i usize = 0
// Last start offset that still leaves room for the whole pattern.
while i + pat.len <= line.len {
if windowEq(line[i .. i + pat.len], pat, ignore_case) {
return true
}
i += 1
}
return false
}
/// Byte-equality over two equal-length slices, folding case when asked. The
/// caller guarantees `a.len == pat.len`, so the scan walks `pat.len` bytes.
fn windowEq(a []u8, pat []u8, ignore_case bool) bool {
var j usize = 0
while j < pat.len {
var x = a[j]
var y = pat[j]
if ignore_case {
x = lower(x)
y = lower(y)
}
if x != y {
return false
}
j += 1
}
return true
}
/// ASCII lowercase fold: A–Z (0x41–0x5A) map to a–z by +0x20; every other byte
/// is returned unchanged. The guard keeps the add in range (max 'Z'+32 == 'z'),
/// so no overflow even with ReleaseSmall's traps compiled out.
fn lower(c u8) u8 {
return if (c >= 'A' && c <= 'Z') c + 32 else c
}
/// First offset >= `from` at which `needle` occurs in `hay`, or null if there is
/// none. The offset-returning sibling of lineContains, for /bin/edit's ctrl-W
/// search-from-cursor (grep itself stays on the bool form). Case-sensitive —
/// this search does not fold case. An empty `needle` returns null so a blank search
/// is an inert no-op rather than a match that never advances the cursor. Same
/// windowed scan as lineContains; O((hay.len - from) * needle.len).
pub fn find(hay []u8, needle []u8, from usize) ?usize {
if needle.len == 0 || from >= hay.len {
return null
}
var i usize = from
while i + needle.len <= hay.len {
if windowEq(hay[i .. i + needle.len], needle, false) {
return i
}
i += 1
}
return null
}
// ---- host tests ------------------------------------------------------------
const std = #import("std")
const testing = std.testing
test "substring hit anywhere in the line" {
try testing.expect(lineContains("hello world", "world", false))
try testing.expect(lineContains("hello world", "hello", false))
try testing.expect(lineContains("hello world", "lo wo", false))
}
test "no match returns false" {
try testing.expect(!lineContains("hello world", "xyz", false))
try testing.expect(!lineContains("abc", "abcd", false)) // pat longer than line
}
test "empty pattern matches every line, even empty" {
try testing.expect(lineContains("anything", "", false))
try testing.expect(lineContains("", "", false))
}
test "empty line matches only the empty pattern" {
try testing.expect(!lineContains("", "a", false))
}
test "case-sensitive by default" {
try testing.expect(!lineContains("Hello", "hello", false))
try testing.expect(lineContains("Hello", "Hello", false))
}
test "ignore_case folds both sides over ASCII" {
try testing.expect(lineContains("Hello World", "hello", true))
try testing.expect(lineContains("hello world", "WORLD", true))
try testing.expect(lineContains("MiXeD", "mixed", true))
}
test "ignore_case leaves non-letters and high bytes alone" {
try testing.expect(lineContains("a1b2c3", "1B2", true))
// 0x80 has no case; it must match itself, not fold.
const hi = [_]u8{ 'x', 0x80, 'y' }
try testing.expect(lineContains(&hi, &hi, true))
}
test "match at the very start and very end" {
try testing.expect(lineContains("abcdef", "ab", false))
try testing.expect(lineContains("abcdef", "ef", false))
try testing.expect(lineContains("abcdef", "abcdef", false))
}
test "find returns the first offset at or after from" {
try testing.expectEqual(#as(?usize, 0), find("abcabc", "abc", 0))
try testing.expectEqual(#as(?usize, 3), find("abcabc", "abc", 1)) // skip the first hit
try testing.expectEqual(#as(?usize, 2), find("xxneedle", "needle", 0))
}
test "find reports no match" {
try testing.expectEqual(#as(?usize, null), find("abc", "xyz", 0))
try testing.expectEqual(#as(?usize, null), find("abc", "abcd", 0)) // needle longer than hay
try testing.expectEqual(#as(?usize, null), find("abcabc", "abc", 4)) // no hit past offset 4
}
test "find with from past the end or an empty needle yields null" {
try testing.expectEqual(#as(?usize, null), find("abc", "a", 3))
try testing.expectEqual(#as(?usize, null), find("abc", "a", 99))
try testing.expectEqual(#as(?usize, null), find("abc", "", 0)) // blank search is inert
}